from typing import List import requests from bs4 import BeautifulSoup from pydantic import BaseModel class Dosage(BaseModel): name: str price: float class Formulation(BaseModel): name: str dosages: List[Dosage] class DrugPriceResponse(BaseModel): drug_name: str url: str prices: List[Formulation] status: str class DrugPriceParser: """Parser for extracting drug pricing information from Drugs.com""" BASE_URL = "https://www.drugs.com/price-guide" def __init__(self): self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Sec-GPC': '1', 'Connection': 'keep-alive', 'Cookie': 'ddc-pvc=8; ddcsubscribe=disabled', 'Upgrade-Insecure-Requests': '1', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Priority': 'u=0, i', 'TE': 'trailers' }) def get_drug_prices(self, drug_name: str) -> DrugPriceResponse: """ Get pricing information for a specific drug. Args: drug_name: Name of the drug (e.g., 'alprazolam') Returns: DrugPriceResponse containing pricing information """ url = f"{self.BASE_URL}/{drug_name.lower()}#prices" user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0' self.session.headers.update({'User-Agent': user_agent}) response = self.session.get(url, timeout=15) soup = BeautifulSoup(response.content, 'html.parser') prices_data = self._extract_prices(soup, drug_name) return DrugPriceResponse( drug_name=drug_name, url=url, prices=prices_data, status="success" ) def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> List[Formulation]: """Extract pricing information from the parsed HTML""" formulations = [] div_content = soup.find('div', {'id': 'content'}) formulation_elements = div_content.find_all('h3') for formulation in formulation_elements: if formulation.get('class'): break formulation_name = formulation.get_text() dosages = [] dosages_table = formulation.find_next('div') dosage_elements = dosages_table.find_all('details') for dosage in dosage_elements: quantity_table = dosage.find('table') quantity_row = quantity_table.find_all('td', {'class': 'ddc-text-right'}) price_per_unit = quantity_row[0].get_text() price_per_unit = price_per_unit.split(" ")[0] summary = dosage.find('summary') spans = summary.find_all('span') dosage_name = spans[0].find('b').get_text() # dosage_price = spans[1].find_next('b').get_text() dosages.append(Dosage( name=dosage_name.rstrip(), price=float(price_per_unit.rstrip().replace('$', '').replace(',', '')) )) formulations.append(Formulation( name=formulation_name.rstrip(), dosages=dosages )) return formulations def parse_drug_prices(drug_name: str) -> str: parser = DrugPriceParser() result = parser.get_drug_prices(drug_name) return result.model_dump_json(indent=2) if __name__ == "__main__": import sys if len(sys.argv) > 1: drug_name = sys.argv[1] result = parse_drug_prices(drug_name) print(result) else: print("Usage: python drug_price_parser.py ") print("Example: python drug_price_parser.py alprazolam")