125 lines
No EOL
4.1 KiB
Python
125 lines
No EOL
4.1 KiB
Python
from typing import List
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class Dosage(BaseModel):
|
|
name: str
|
|
price: float
|
|
|
|
|
|
class Formulation(BaseModel):
|
|
name: str
|
|
dosages: List[Dosage]
|
|
|
|
|
|
class DrugPriceResponse(BaseModel):
|
|
drug_name: str
|
|
url: str
|
|
prices: List[Formulation]
|
|
status: str
|
|
|
|
|
|
class DrugPriceParser:
|
|
"""Parser for extracting drug pricing information from Drugs.com"""
|
|
|
|
BASE_URL = "https://www.drugs.com/price-guide"
|
|
|
|
def __init__(self):
|
|
self.session = requests.Session()
|
|
self.session.headers.update({
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.5',
|
|
'Accept-Encoding': 'gzip, deflate, br, zstd',
|
|
'Sec-GPC': '1',
|
|
'Connection': 'keep-alive',
|
|
'Cookie': 'ddc-pvc=8; ddcsubscribe=disabled',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Sec-Fetch-Mode': 'navigate',
|
|
'Sec-Fetch-Site': 'none',
|
|
'Sec-Fetch-User': '?1',
|
|
'Priority': 'u=0, i',
|
|
'TE': 'trailers'
|
|
})
|
|
|
|
def get_drug_prices(self, drug_name: str) -> DrugPriceResponse:
|
|
"""
|
|
Get pricing information for a specific drug.
|
|
|
|
Args:
|
|
drug_name: Name of the drug (e.g., 'alprazolam')
|
|
|
|
Returns:
|
|
DrugPriceResponse containing pricing information
|
|
"""
|
|
url = f"{self.BASE_URL}/{drug_name.lower()}#prices"
|
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'
|
|
|
|
self.session.headers.update({'User-Agent': user_agent})
|
|
|
|
response = self.session.get(url, timeout=15)
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
prices_data = self._extract_prices(soup, drug_name)
|
|
|
|
return DrugPriceResponse(
|
|
drug_name=drug_name,
|
|
url=url,
|
|
prices=prices_data,
|
|
status="success"
|
|
)
|
|
|
|
def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> List[Formulation]:
|
|
"""Extract pricing information from the parsed HTML"""
|
|
|
|
formulations = []
|
|
div_content = soup.find('div', {'id': 'content'})
|
|
formulation_elements = div_content.find_all('h3')
|
|
for formulation in formulation_elements:
|
|
if formulation.get('class'):
|
|
break
|
|
formulation_name = formulation.get_text()
|
|
dosages = []
|
|
dosages_table = formulation.find_next('div')
|
|
dosage_elements = dosages_table.find_all('details')
|
|
for dosage in dosage_elements:
|
|
quantity_table = dosage.find('table')
|
|
quantity_row = quantity_table.find_all('td', {'class': 'ddc-text-right'})
|
|
price_per_unit = quantity_row[0].get_text()
|
|
price_per_unit = price_per_unit.split(" ")[0]
|
|
|
|
summary = dosage.find('summary')
|
|
spans = summary.find_all('span')
|
|
dosage_name = spans[0].find('b').get_text()
|
|
# dosage_price = spans[1].find_next('b').get_text()
|
|
dosages.append(Dosage(
|
|
name=dosage_name.rstrip(),
|
|
price=float(price_per_unit.rstrip().replace('$', '').replace(',', ''))
|
|
))
|
|
formulations.append(Formulation(
|
|
name=formulation_name.rstrip(),
|
|
dosages=dosages
|
|
))
|
|
return formulations
|
|
|
|
|
|
def parse_drug_prices(drug_name: str) -> str:
|
|
parser = DrugPriceParser()
|
|
result = parser.get_drug_prices(drug_name)
|
|
return result.model_dump_json(indent=2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
drug_name = sys.argv[1]
|
|
result = parse_drug_prices(drug_name)
|
|
print(result)
|
|
else:
|
|
print("Usage: python drug_price_parser.py <drug_name>")
|
|
print("Example: python drug_price_parser.py alprazolam") |