update dtq conditions
This commit is contained in:
parent
80916f6c3e
commit
0a41d9ba82
11 changed files with 915 additions and 303 deletions
|
|
@ -1,18 +1,25 @@
|
|||
"""
|
||||
Drug Price Parser for Drugs.com
|
||||
|
||||
This module provides functionality to scrape drug pricing information
|
||||
from Drugs.com and return it in JSON format.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import quote_plus
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class Dosage(BaseModel):
|
||||
name: str
|
||||
price: float
|
||||
|
||||
|
||||
class Formulation(BaseModel):
|
||||
name: str
|
||||
dosages: List[Dosage]
|
||||
|
||||
|
||||
class DrugPriceResponse(BaseModel):
|
||||
drug_name: str
|
||||
url: str
|
||||
prices: List[Formulation]
|
||||
status: str
|
||||
|
||||
|
||||
class DrugPriceParser:
|
||||
|
|
@ -39,7 +46,7 @@ class DrugPriceParser:
|
|||
'TE': 'trailers'
|
||||
})
|
||||
|
||||
def get_drug_prices(self, drug_name: str) -> Dict[str, Any]:
|
||||
def get_drug_prices(self, drug_name: str) -> DrugPriceResponse:
|
||||
"""
|
||||
Get pricing information for a specific drug.
|
||||
|
||||
|
|
@ -47,7 +54,7 @@ class DrugPriceParser:
|
|||
drug_name: Name of the drug (e.g., 'alprazolam')
|
||||
|
||||
Returns:
|
||||
Dictionary containing pricing information in JSON format
|
||||
DrugPriceResponse containing pricing information
|
||||
"""
|
||||
url = f"{self.BASE_URL}/{drug_name.lower()}#prices"
|
||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'
|
||||
|
|
@ -59,49 +66,46 @@ class DrugPriceParser:
|
|||
|
||||
prices_data = self._extract_prices(soup, drug_name)
|
||||
|
||||
return {
|
||||
"drug_name": drug_name,
|
||||
"url": url,
|
||||
"prices": prices_data,
|
||||
"status": "success"
|
||||
}
|
||||
|
||||
|
||||
def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> Dict[str, Any]:
|
||||
return DrugPriceResponse(
|
||||
drug_name=drug_name,
|
||||
url=url,
|
||||
prices=prices_data,
|
||||
status="success"
|
||||
)
|
||||
|
||||
def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> List[Formulation]:
|
||||
"""Extract pricing information from the parsed HTML"""
|
||||
|
||||
prices_data = {
|
||||
"formulations": [],
|
||||
}
|
||||
formulations = []
|
||||
div_content = soup.find('div', {'id': 'content'})
|
||||
formulations = div_content.find_all('h3')
|
||||
for formulation in formulations:
|
||||
formulation_elements = div_content.find_all('h3')
|
||||
for formulation in formulation_elements:
|
||||
if formulation.get('class'):
|
||||
break
|
||||
formulation_name = formulation.get_text()
|
||||
formulation_data = {
|
||||
"name": formulation_name.rstrip(),
|
||||
"dosages": []
|
||||
}
|
||||
dosages = []
|
||||
dosages_table = formulation.find_next('div')
|
||||
dosages = dosages_table.find_all('details')
|
||||
for dosage in dosages:
|
||||
dosage_elements = dosages_table.find_all('details')
|
||||
for dosage in dosage_elements:
|
||||
summary = dosage.find('summary')
|
||||
spans = summary.find_all('span')
|
||||
dosage_name = spans[0].find('b').get_text()
|
||||
dosage_price = spans[1].find_next('b').get_text()
|
||||
formulation_data["dosages"].append({
|
||||
"name": dosage_name.rstrip(),
|
||||
"price": float(dosage_price.rstrip().replace('$', '').replace(',', ''))
|
||||
})
|
||||
prices_data["formulations"].append(formulation_data)
|
||||
return prices_data
|
||||
dosages.append(Dosage(
|
||||
name=dosage_name.rstrip(),
|
||||
price=float(dosage_price.rstrip().replace('$', '').replace(',', ''))
|
||||
))
|
||||
formulations.append(Formulation(
|
||||
name=formulation_name.rstrip(),
|
||||
dosages=dosages
|
||||
))
|
||||
return formulations
|
||||
|
||||
|
||||
def parse_drug_prices(drug_name: str) -> str:
|
||||
parser = DrugPriceParser()
|
||||
result = parser.get_drug_prices(drug_name)
|
||||
return json.dumps(result, indent=2)
|
||||
return result.model_dump_json(indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue