import os
import sys
import mysql.connector
import requests
from bs4 import BeautifulSoup
import logging
from dotenv import load_dotenv
import re

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class BillListingCollector:
    def __init__(self):
        self.base_url = "https://ilga.gov/legislation/default.asp"
        
        # Get the absolute path to the project root directory
        self.project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        
        # Load environment variables from .env file
        env_path = os.path.join(self.project_root, 'app', '.env')
        logger.info(f"Looking for .env file at: {env_path}")
        if not os.path.exists(env_path):
            logger.error(f".env file not found at {env_path}")
            raise FileNotFoundError(f".env file not found at {env_path}")
            
        load_dotenv(env_path)
        
        # Initialize database connection
        self.db_config = {
            'host': os.getenv('DB_HOST'),
            'user': os.getenv('DB_USER'),
            'password': os.getenv('DB_PASS'),
            'database': os.getenv('DB_NAME')
        }

        # Verify we have the required environment variables
        required_vars = ['DB_HOST', 'DB_USER', 'DB_PASS', 'DB_NAME']
        missing_vars = [var for var in required_vars if not os.getenv(var)]
        if missing_vars:
            logger.error(f"Missing required environment variables: {', '.join(missing_vars)}")
            raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
            
        logger.info("Successfully loaded environment variables")
        
        # Get GENERAL_ASSEMBLY from PHP config
        self.general_assembly = self.get_general_assembly()
        
        # Session IDs
        self.session_types = {
            "regular": 1,
            "special": 2
        }

    def get_config_content(self):
        """Read the PHP config file content"""
        config_path = os.path.join(self.project_root, 'app', 'config.php')
        try:
            with open(config_path, 'r') as f:
                return f.read()
        except FileNotFoundError:
            logger.error(f"Config file not found at: {config_path}")
            raise

    def get_general_assembly(self):
        """Read GENERAL_ASSEMBLY from PHP config file"""
        content = self.get_config_content()
        match = re.search(r'const GENERAL_ASSEMBLY = (\d+);', content)
        if match:
            return int(match.group(1))
        raise ValueError("Could not find GENERAL_ASSEMBLY in config.php")

    def connect_to_db(self):
        """Establish database connection"""
        try:
            conn = mysql.connector.connect(**self.db_config)
            return conn
        except mysql.connector.Error as e:
            logger.error(f"Error connecting to database: {e}")
            raise

    def collect_listings(self, session_type, session_id):
        """Collect bill listing URLs for a specific session"""
        try:
            url = f"{self.base_url}?GA={self.general_assembly}&SessionID={session_id}"
            response = requests.get(url)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'lxml')
            
            conn = self.connect_to_db()
            cursor = conn.cursor()
            
            # Document types and their chambers
            doc_types = {
                'Senate': {
                    'Bills': 'SB',
                    'Resolutions': 'SR',
                    'Joint Resolutions': 'SJR',
                    'Joint Resolution Constitutional Amendments': 'SJRCA'
                },
                'House': {
                    'Bills': 'HB',
                    'Resolutions': 'HR',
                    'Joint Resolutions': 'HJR',
                    'Joint Resolution Constitutional Amendments': 'HJRCA'
                }
            }
            
            # Find and store all bill listing links
            for chamber, types in doc_types.items():
                for doc_name, doc_type in types.items():
                    links = soup.find_all('a', href=lambda x: x and 'grplist.asp' in x and f'DocTypeID={doc_type}' in x)
                    for link in links:
                        link_name = link.text.strip()
                        link_url = f"https://ilga.gov/legislation/{link['href']}"
                        
                        # Insert into database
                        sql = """INSERT INTO bill_listings 
                                (chamber, link_name, link_url, doc_type, session_type) 
                                VALUES (%s, %s, %s, %s, %s)
                                ON DUPLICATE KEY UPDATE
                                link_url = VALUES(link_url)"""
                        values = (chamber, link_name, link_url, doc_name, session_type)
                        
                        try:
                            cursor.execute(sql, values)
                            logger.info(f"Stored {chamber} {doc_name} listing: {link_name}")
                        except mysql.connector.Error as e:
                            logger.error(f"Error storing listing: {e}")
            
            conn.commit()
            cursor.close()
            conn.close()
            
        except Exception as e:
            logger.error(f"Error collecting listings for {session_type} session: {e}")
            raise

    def collect_all_listings(self):
        """Collect bill listings for both regular and special sessions"""
        for session_type, session_id in self.session_types.items():
            logger.info(f"Collecting listings for {session_type} session...")
            self.collect_listings(session_type, session_id)

if __name__ == "__main__":
    try:
        collector = BillListingCollector()
        collector.collect_all_listings()
        logger.info("Bill listing collection completed successfully")
    except Exception as e:
        logger.error(f"Error in main: {e}")
        sys.exit(1) 