update dtq conditions

This commit is contained in:
ipu 2025-08-05 00:00:58 +03:00
parent 80916f6c3e
commit 0a41d9ba82
11 changed files with 915 additions and 303 deletions

View file

@ -1,18 +1,25 @@
"""
Drug Price Parser for Drugs.com
This module provides functionality to scrape drug pricing information
from Drugs.com and return it in JSON format.
"""
import json
import re
import time
from typing import Dict, List, Optional, Any
from urllib.parse import quote_plus
from typing import List
import requests
from bs4 import BeautifulSoup
from pydantic import BaseModel
class Dosage(BaseModel):
name: str
price: float
class Formulation(BaseModel):
name: str
dosages: List[Dosage]
class DrugPriceResponse(BaseModel):
drug_name: str
url: str
prices: List[Formulation]
status: str
class DrugPriceParser:
@ -39,7 +46,7 @@ class DrugPriceParser:
'TE': 'trailers'
})
def get_drug_prices(self, drug_name: str) -> Dict[str, Any]:
def get_drug_prices(self, drug_name: str) -> DrugPriceResponse:
"""
Get pricing information for a specific drug.
@ -47,7 +54,7 @@ class DrugPriceParser:
drug_name: Name of the drug (e.g., 'alprazolam')
Returns:
Dictionary containing pricing information in JSON format
DrugPriceResponse containing pricing information
"""
url = f"{self.BASE_URL}/{drug_name.lower()}#prices"
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'
@ -59,49 +66,46 @@ class DrugPriceParser:
prices_data = self._extract_prices(soup, drug_name)
return {
"drug_name": drug_name,
"url": url,
"prices": prices_data,
"status": "success"
}
def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> Dict[str, Any]:
return DrugPriceResponse(
drug_name=drug_name,
url=url,
prices=prices_data,
status="success"
)
def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> List[Formulation]:
"""Extract pricing information from the parsed HTML"""
prices_data = {
"formulations": [],
}
formulations = []
div_content = soup.find('div', {'id': 'content'})
formulations = div_content.find_all('h3')
for formulation in formulations:
formulation_elements = div_content.find_all('h3')
for formulation in formulation_elements:
if formulation.get('class'):
break
formulation_name = formulation.get_text()
formulation_data = {
"name": formulation_name.rstrip(),
"dosages": []
}
dosages = []
dosages_table = formulation.find_next('div')
dosages = dosages_table.find_all('details')
for dosage in dosages:
dosage_elements = dosages_table.find_all('details')
for dosage in dosage_elements:
summary = dosage.find('summary')
spans = summary.find_all('span')
dosage_name = spans[0].find('b').get_text()
dosage_price = spans[1].find_next('b').get_text()
formulation_data["dosages"].append({
"name": dosage_name.rstrip(),
"price": float(dosage_price.rstrip().replace('$', '').replace(',', ''))
})
prices_data["formulations"].append(formulation_data)
return prices_data
dosages.append(Dosage(
name=dosage_name.rstrip(),
price=float(dosage_price.rstrip().replace('$', '').replace(',', ''))
))
formulations.append(Formulation(
name=formulation_name.rstrip(),
dosages=dosages
))
return formulations
def parse_drug_prices(drug_name: str) -> str:
parser = DrugPriceParser()
result = parser.get_drug_prices(drug_name)
return json.dumps(result, indent=2)
return result.model_dump_json(indent=2)
if __name__ == "__main__":