add drug parser; add iha estimation rules

2025-07-31 22:08:02 +03:00 · 2025-07-31 22:08:02 +03:00 · 80916f6c3e
commit 80916f6c3e
parent bf1d988d36
10 changed files with 1271 additions and 15 deletions
--- a/src/drug_price_parser.py
+++ b/src/drug_price_parser.py
@ -0,0 +1,116 @@
+"""
+Drug Price Parser for Drugs.com
+
+This module provides functionality to scrape drug pricing information
+from Drugs.com and return it in JSON format.
+"""
+
+import json
+import re
+import time
+from typing import Dict, List, Optional, Any
+from urllib.parse import quote_plus
+
+import requests
+from bs4 import BeautifulSoup
+
+
+class DrugPriceParser:
+    """Parser for extracting drug pricing information from Drugs.com"""
+    
+    BASE_URL = "https://www.drugs.com/price-guide"
+    
+    def __init__(self):
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate, br, zstd',
+            'Sec-GPC': '1',
+            'Connection': 'keep-alive',
+            'Cookie': 'ddc-pvc=8; ddcsubscribe=disabled',
+            'Upgrade-Insecure-Requests': '1',
+            'Sec-Fetch-Dest': 'document',
+            'Sec-Fetch-Mode': 'navigate',
+            'Sec-Fetch-Site': 'none',
+            'Sec-Fetch-User': '?1',
+            'Priority': 'u=0, i',
+            'TE': 'trailers'
+        })
+    
+    def get_drug_prices(self, drug_name: str) -> Dict[str, Any]:
+        """
+        Get pricing information for a specific drug.
+        
+        Args:
+            drug_name: Name of the drug (e.g., 'alprazolam')
+            
+        Returns:
+            Dictionary containing pricing information in JSON format
+        """
+        url = f"{self.BASE_URL}/{drug_name.lower()}#prices"
+        user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0'
+        
+        self.session.headers.update({'User-Agent': user_agent})
+        
+        response = self.session.get(url, timeout=15)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        
+        prices_data = self._extract_prices(soup, drug_name)
+        
+        return {
+            "drug_name": drug_name,
+            "url": url,
+            "prices": prices_data,
+            "status": "success"
+        }
+            
+    
+    def _extract_prices(self, soup: BeautifulSoup, drug_name: str) -> Dict[str, Any]:
+        """Extract pricing information from the parsed HTML"""
+        
+        prices_data = {
+            "formulations": [],
+        }
+        div_content = soup.find('div', {'id': 'content'})
+        formulations = div_content.find_all('h3')
+        for formulation in formulations:
+            if formulation.get('class'):
+                break
+            formulation_name = formulation.get_text()
+            formulation_data = {
+                "name": formulation_name.rstrip(),
+                "dosages": []
+            }
+            dosages_table = formulation.find_next('div')
+            dosages = dosages_table.find_all('details')
+            for dosage in dosages:
+                summary = dosage.find('summary')
+                spans = summary.find_all('span')
+                dosage_name = spans[0].find('b').get_text()
+                dosage_price = spans[1].find_next('b').get_text()
+                formulation_data["dosages"].append({
+                    "name": dosage_name.rstrip(),
+                    "price": float(dosage_price.rstrip().replace('$', '').replace(',', ''))
+                })
+            prices_data["formulations"].append(formulation_data)
+        return prices_data
+
+
+def parse_drug_prices(drug_name: str) -> str:
+    parser = DrugPriceParser()
+    result = parser.get_drug_prices(drug_name)
+    return json.dumps(result, indent=2)
+
+
+if __name__ == "__main__":
+    import sys
+    
+    if len(sys.argv) > 1:
+        drug_name = sys.argv[1]
+        result = parse_drug_prices(drug_name)
+        print(result)
+    else:
+        print("Usage: python drug_price_parser.py <drug_name>")
+        print("Example: python drug_price_parser.py alprazolam")