import requests
import xml.etree.ElementTree as ET
import logging
from typing import Dict, List, Optional
import os
from dotenv import load_dotenv
import mysql.connector
from mysql.connector import Error
import time
import sys
import codecs
from config import CURRENT_SESSION, SENATE_MEMBERS_URL, HOUSE_MEMBERS_URL
from bs4 import BeautifulSoup
import json

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('logs/scraper.log', mode='w'),  # 'w' for overwrite
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

class ILGAScraper:
    def __init__(self):
        self.session = requests.Session()
        # Set headers to ensure proper encoding
        self.session.headers.update({
            'Accept-Charset': 'utf-8',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        })
        self.current_session = CURRENT_SESSION
        self.errors_file = 'errors.json'
        self.corrections_file = 'error_check.json'
        self.setup_database()
        self.ensure_error_files_exist()
    
    def ensure_error_files_exist(self):
        """Create error and correction files if they don't exist."""
        if not os.path.exists(self.errors_file):
            with open(self.errors_file, 'w') as f:
                json.dump([], f, indent=2)
        
        if not os.path.exists(self.corrections_file):
            with open(self.corrections_file, 'w') as f:
                json.dump({}, f, indent=2)
    
    def load_corrections(self) -> Dict[str, int]:
        """Load manual corrections from error_check.json."""
        try:
            with open(self.corrections_file, 'r') as f:
                corrections = json.load(f)
                # Clean up any empty dictionaries that might have been added
                if isinstance(corrections, dict):
                    # Remove any None or empty dict values
                    corrections = {k: v for k, v in corrections.items() if v is not None}
                    logger.info(f"Loaded {len(corrections)} manual corrections")
                    for name, id in corrections.items():
                        logger.info(f"  Found correction: {name} -> {id}")
                return corrections
        except Exception as e:
            logger.error(f"Error loading corrections file: {e}")
            return {}
    
    def update_errors_file(self, errors: List[Dict]):
        """Update the errors file with new errors."""
        try:
            # First read existing errors to avoid duplicates
            existing_errors = []
            if os.path.exists(self.errors_file):
                with open(self.errors_file, 'r') as f:
                    try:
                        existing_errors = json.load(f)
                    except json.JSONDecodeError:
                        pass

            # Only add new errors that aren't in the corrections file
            corrections = self.load_corrections()
            new_errors = [
                error for error in errors 
                if error['name'] not in corrections
            ]

            if new_errors:
                logger.info(f"Found {len(new_errors)} new errors:")
                for error in new_errors:
                    logger.info(f"  {error['name']} ({error['chamber']})")

            # Write the new errors
            with open(self.errors_file, 'w') as f:
                json.dump(new_errors, f, indent=2, ensure_ascii=False)
                
        except Exception as e:
            logger.error(f"Error updating errors file: {e}")
    
    def get_db_connection(self):
        try:
            connection = mysql.connector.connect(
                host=os.getenv('DB_HOST'),
                database=os.getenv('DB_NAME'),
                user=os.getenv('DB_USER'),
                password=os.getenv('DB_PASS')
            )
            return connection
        except Error as e:
            logger.error(f"Error connecting to MySQL: {e}")
            return None
    
    def setup_database(self):
        connection = self.get_db_connection()
        if not connection:
            return
        
        try:
            cursor = connection.cursor()
            
            # First check if session column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'session'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add session column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN session INT NOT NULL DEFAULT 104
                """)
            
            # Check if member_id column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'member_id'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add member_id column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN member_id INT
                """)
            
            # Check if member_url column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'member_url'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add member_url column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN member_url VARCHAR(255)
                """)
            
            # Check if headshot_url column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'headshot_url'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add headshot_url column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN headshot_url VARCHAR(255)
                """)
            
            # Check if committees column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'committees'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add committees column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN committees TEXT
                """)
            
            # Check if years_served column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'years_served'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add years_served column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN years_served VARCHAR(255)
                """)
            
            # Check if district_office_address column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'district_office_address'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add district_office_address column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN district_office_address VARCHAR(255)
                """)
            
            # Check if district_office_phone column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'district_office_phone'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add district_office_phone column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN district_office_phone VARCHAR(50)
                """)
            
            # Check if district_office_fax column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'district_office_fax'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add district_office_fax column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN district_office_fax VARCHAR(50)
                """)
            
            # Check if biography column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'biography'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add biography column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN biography TEXT
                """)
            
            # Check if associated_legislator_name column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'associated_legislator_name'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add associated_legislator_name column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN associated_legislator_name VARCHAR(255)
                """)
            
            # Check if associated_legislator_id column exists
            cursor.execute("""
            SELECT COUNT(*)
            FROM information_schema.columns 
            WHERE table_name = 'legislators'
            AND column_name = 'associated_legislator_id'
            """)
            
            if cursor.fetchone()[0] == 0:
                # Add associated_legislator_id column if it doesn't exist
                cursor.execute("""
                ALTER TABLE legislators
                ADD COLUMN associated_legislator_id INT
                """)
            
            connection.commit()
            logger.info("Database schema updated successfully")
        except Error as e:
            logger.error(f"Error setting up database: {e}")
        finally:
            if connection.is_connected():
                cursor.close()
                connection.close()
    
    def test_connection(self):
        connection = self.get_db_connection()
        if connection:
            logger.info("✅ Database connection successful")
            connection.close()
        else:
            logger.error("❌ Database connection failed")
    
    def fetch_xml(self, url: str) -> Optional[str]:
        try:
            response = self.session.get(url)
            response.raise_for_status()
            
            # Get raw content first
            raw_content = response.content
            
            # Log raw bytes for problematic names
            problematic_patterns = [b'Guzm', b'Gonz', b'Jim', b'Ort']
            for line in raw_content.split(b'\n'):
                if any(pattern in line for pattern in problematic_patterns):
                    logger.debug(f"Raw bytes: {line}")
                    logger.debug(f"UTF-8: {line.decode('utf-8', errors='replace')}")
                    logger.debug(f"Latin1: {line.decode('latin1', errors='replace')}")
                    logger.debug(f"CP1252: {line.decode('cp1252', errors='replace')}")
                    logger.debug("---")
            
            # Try different decodings
            try:
                content = raw_content.decode('utf-8-sig')
            except UnicodeDecodeError:
                try:
                    content = raw_content.decode('latin1')
                except UnicodeDecodeError:
                    try:
                        content = raw_content.decode('cp1252')
                    except UnicodeDecodeError:
                        content = raw_content.decode('utf-8', errors='replace')
            
            return content
        except requests.RequestException as e:
            logger.error(f"Error fetching XML from {url}: {e}")
            return None
    
    def parse_legislator_xml(self, text: str, chamber: str) -> List[Dict]:
        if not text:
            return []
        
        legislators = []
        seen_names = set()
        
        try:
            # Parse the XML
            root = ET.fromstring(text)
            
            # Find all legislator elements
            for member in root.findall('.//Member'):
                # Extract and normalize the name first
                raw_name = member.findtext('Name', '').strip()
                name = self.normalize_text(raw_name)
                
                # Debug log for problematic names
                if any(x in raw_name for x in ['Guzm', 'Gonz', 'Jim', 'Ort']):
                    logger.debug(f"Processing name - Raw: {repr(raw_name)}, Normalized: {repr(name)}")
                
                party = self.normalize_text(member.findtext('Party', '').strip())
                district = self.normalize_text(member.findtext('District', '').strip())
                
                # Get address information
                capitol_address = member.find('CapitolAddress')
                if capitol_address is not None:
                    address_line1 = self.normalize_text(capitol_address.findtext('AddressLine1', '').strip())
                    address_line2 = self.normalize_text(capitol_address.findtext('AddressLine2', '').strip())
                    city = self.normalize_text(capitol_address.findtext('City', '').strip())
                    state = self.normalize_text(capitol_address.findtext('State', '').strip())
                    zip_code = self.normalize_text(capitol_address.findtext('Zip', '').strip())
                    phone = self.normalize_text(capitol_address.findtext('PhoneNumber', '').strip())
                else:
                    address_line1 = address_line2 = city = state = zip_code = phone = ''
                
                # Skip if we've seen this name
                if name in seen_names:
                    logger.debug(f"Skipping duplicate: {name}")
                    continue
                seen_names.add(name)
                
                legislator = {
                    'name': name,
                    'district': district,
                    'party': party,
                    'chamber': chamber,
                    'email': '',  # Email not present in XML
                    'phone': phone,
                    'website': '',  # Website not present in XML
                    'session': self.current_session,
                    'address_line1': address_line1,
                    'address_line2': address_line2,
                    'city': city,
                    'state': state,
                    'zip_code': zip_code
                }
                
                legislators.append(legislator)
                logger.info(f"Found legislator: {name} ({district})")
            
            logger.info(f"Found {len(legislators)} legislators for {chamber}")
            return legislators
            
        except ET.ParseError as e:
            logger.error(f"Error parsing XML: {e}")
            return []
    
    def normalize_text(self, text: str) -> str:
        """Normalize text to handle special characters."""
        try:
            # Log the raw bytes for problematic names
            if any(x in text for x in ['Guzm', 'Gonz', 'Jim', 'Ort']):
                logger.debug(f"Before normalization: {text}")
                logger.debug(f"Raw bytes: {text.encode('raw_unicode_escape')}")
            
            # Map of known problematic characters and their correct replacements
            char_map = {
                'ý': 'á',  # Common incorrect encoding
                'Ã¡': 'á',  # UTF-8 encoded as Latin1
                'Ã©': 'é',
                'Ã­': 'í',
                'Ã³': 'ó',
                'Ãº': 'ú',
                'Ã±': 'ñ',
                '\u00fd': 'á',  # Unicode 'ý'
                '\u00e1': 'á',  # Unicode 'á'
                '\u00e9': 'é',  # Unicode 'é'
                '\u00ed': 'í',  # Unicode 'í'
                '\u00f3': 'ó',  # Unicode 'ó'
                '\u00fa': 'ú',  # Unicode 'ú'
                '\u00f1': 'ñ',  # Unicode 'ñ'
            }
            
            # First try to normalize the entire string
            result = text
            for old, new in char_map.items():
                result = result.replace(old, new)
            
            # Special handling for known names
            name_map = {
                'Graciela Guzmyn': 'Graciela Guzmán',
                'Edgar Gonzalez Jr.': 'Edgar González Jr.',
                'Lilian Jimenez': 'Lilian Jiménez',
                'Airy M. Ortiz': 'Airy M. Ortíz'
            }
            
            # Try to match against known names (ignoring accents)
            import unicodedata
            def strip_accents(s):
                return ''.join(c for c in unicodedata.normalize('NFD', s)
                             if unicodedata.category(c) != 'Mn')
            
            normalized_result = strip_accents(result)
            for known_name, correct_name in name_map.items():
                if strip_accents(known_name) == normalized_result:
                    result = correct_name
                    break
            
            if any(x in result for x in ['Guzm', 'Gonz', 'Jim', 'Ort']):
                logger.debug(f"After normalization: {result}")
            
            return result
        except Exception as e:
            logger.warning(f"Error normalizing text: {e}")
            return text
    
    def scrape_member_ids(self, chamber: str) -> Dict[str, int]:
        """Scrape member IDs from the chamber page."""
        url = f"https://ilga.gov/{chamber}/default.asp"
        member_ids = {}
        raw_names = {}  # Store raw names for fuzzy matching
        
        try:
            response = self.session.get(url)
            response.raise_for_status()
            response.encoding = 'utf-8'
            
            logger.debug(f"Fetching member IDs from {url}")
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find all links that contain MemberID
            for link in soup.find_all('a', href=True):
                href = link['href']
                if 'MemberID=' in href:
                    # Extract the member ID
                    member_id = int(href.split('MemberID=')[1].split('&')[0])
                    # Get the legislator's name from the link text
                    name = link.text.strip()
                    
                    # Store both normalized and raw names
                    normalized_name = self.normalize_text(name)
                    member_ids[normalized_name] = member_id
                    raw_names[normalized_name] = name
                    
                    logger.debug(f"Found member ID mapping - Raw: {name}, Normalized: {normalized_name}, ID: {member_id}")
            
            return member_ids, raw_names
            
        except Exception as e:
            logger.error(f"Error scraping member IDs from {url}: {e}")
            import traceback
            logger.error(traceback.format_exc())
            return {}, {}

    def find_best_name_match(self, target_name: str, available_names: Dict[str, int], raw_names: Dict[str, str]) -> Optional[int]:
        """Find the best matching name and return its member ID."""
        import unicodedata
        
        def normalize_for_comparison(s: str) -> str:
            """Normalize string for comparison by removing accents and special characters."""
            # Convert to lowercase and remove accents
            s = ''.join(c for c in unicodedata.normalize('NFKD', s.lower())
                       if not unicodedata.combining(c))
            # Remove common titles and suffixes
            s = s.replace('jr.', '').replace('sr.', '').replace('iii', '').replace('ii', '')
            # Remove punctuation and extra spaces
            s = ''.join(c for c in s if c.isalnum() or c.isspace())
            return ' '.join(s.split())
        
        target_normalized = normalize_for_comparison(target_name)
        
        # First try exact match after normalization
        for name, member_id in available_names.items():
            if normalize_for_comparison(name) == target_normalized:
                logger.debug(f"Found exact normalized match for {target_name}: {raw_names[name]}")
                return member_id
        
        # Then try partial matches
        for name, member_id in available_names.items():
            name_normalized = normalize_for_comparison(name)
            # Split names into parts and check if all parts of the target appear in the source
            target_parts = set(target_normalized.split())
            name_parts = set(name_normalized.split())
            if target_parts.issubset(name_parts) or name_parts.issubset(target_parts):
                logger.debug(f"Found partial match for {target_name}: {raw_names[name]}")
                return member_id
        
        # If still no match, try matching just the distinctive parts
        target_parts = set(target_normalized.split())
        for name, member_id in available_names.items():
            name_normalized = normalize_for_comparison(name)
            name_parts = set(name_normalized.split())
            # Check if the distinctive parts match (usually last names)
            distinctive_match = any(
                part in name_normalized 
                for part in ['guzm', 'gonz', 'jimen', 'ortiz', 'ortíz']
                if part in target_normalized
            )
            if distinctive_match:
                logger.debug(f"Found distinctive part match for {target_name}: {raw_names[name]}")
                return member_id
        
        logger.warning(f"No match found for {target_name}")
        return None

    def get_profile_info(self, member_url: str) -> Dict:
        """Fetch additional information from a member's profile page."""
        profile_info = {}
        
        try:
            response = self.session.get(member_url)
            response.raise_for_status()
            response.encoding = 'utf-8'  # Force UTF-8 encoding
            
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Get headshot URL
            try:
                img = soup.find('img', src=lambda x: x and '/images/members/' in x)
                if img:
                    profile_info['headshot_url'] = f"https://ilga.gov{img['src']}"
                    logger.debug(f"Found headshot URL for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting headshot URL: {e}")
            
            # Get email - look for mailto links
            try:
                email_link = soup.find('a', href=lambda x: x and 'mailto:' in x)
                if email_link:
                    email = email_link['href'].replace('mailto:', '').strip()
                    if email and 'ilga.gov' not in email:
                        profile_info['email'] = email
                        logger.debug(f"Found email for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting email: {e}")
            
            # Get years served
            try:
                years_served = soup.find('b', text=lambda x: x and 'Years served:' in x)
                if years_served:
                    years = years_served.next_sibling
                    if years:
                        profile_info['years_served'] = years.strip()
                        logger.debug(f"Found years served for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting years served: {e}")
            
            # Get committees
            try:
                committees_heading = soup.find('b', text=lambda x: x and 'Committee assignments:' in x)
                if committees_heading:
                    committees = []
                    current = committees_heading.next_sibling
                    while current and not (hasattr(current, 'name') and current.name == 'b'):
                        if hasattr(current, 'text') and current.text.strip():
                            committees.append(current.text.strip())
                        current = current.next_sibling
                    
                    if committees:
                        profile_info['committees'] = '; '.join(committees)
                        logger.debug(f"Found committees for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting committees: {e}")
            
            # Get biography
            try:
                bio_heading = soup.find('b', text=lambda x: x and 'Biography:' in x)
                if bio_heading:
                    bio_parts = []
                    current = bio_heading.next_sibling
                    while current and not (hasattr(current, 'name') and current.name == 'b'):
                        if hasattr(current, 'text') and current.text.strip():
                            bio_parts.append(current.text.strip())
                        current = current.next_sibling
                    
                    if bio_parts:
                        profile_info['biography'] = ' '.join(bio_parts)
                        logger.debug(f"Found biography for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting biography: {e}")
            
            # Get district office information
            try:
                office_heading = soup.find('b', text=lambda x: x and 'District Office:' in x)
                if office_heading:
                    # Get address
                    address = office_heading.next_sibling
                    if address and isinstance(address, str):
                        profile_info['district_office_address'] = address.strip()
                        logger.debug(f"Found district office address for {member_url}")
                    
                    # Get phone and fax
                    current = office_heading
                    while current:
                        current = current.next_sibling
                        if not current:
                            break
                        
                        if isinstance(current, str):
                            text = current.strip()
                            if '(' in text and ')' in text:
                                profile_info['district_office_phone'] = text
                                logger.debug(f"Found district office phone for {member_url}")
                            elif 'FAX' in text:
                                profile_info['district_office_fax'] = text.split('FAX')[1].strip()
                                logger.debug(f"Found district office fax for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting district office info: {e}")
            
            # Get associated legislator
            try:
                # Look for the associated legislator section
                associated_heading = soup.find('b', text=lambda x: x and 'Associated Senator(s):' in x)
                if not associated_heading:
                    associated_heading = soup.find('b', text=lambda x: x and 'Associated Representative(s):' in x)
                
                if associated_heading:
                    # Find the first link after the heading
                    associated_link = associated_heading.find_next('a', href=lambda x: x and 'MemberID=' in x)
                    if associated_link:
                        profile_info['associated_legislator_name'] = associated_link.text.strip()
                        member_id = associated_link['href'].split('MemberID=')[1].split('&')[0]
                        profile_info['associated_legislator_id'] = int(member_id)
                        logger.debug(f"Found associated legislator for {member_url}")
            except Exception as e:
                logger.warning(f"Error getting associated legislator: {e}")
            
            return profile_info
            
        except Exception as e:
            logger.error(f"Error fetching profile info from {member_url}: {e}")
            return {}

    def get_member_id(self, name: str, chamber_ids: Dict[str, int], raw_names: Dict[str, str], chamber: str) -> Optional[int]:
        """Get member ID using various methods in order of preference."""
        # First check manual corrections
        corrections = self.load_corrections()
        if name in corrections:
            member_id = corrections[name]
            logger.info(f"Using manual correction for {name}: {member_id}")
            return member_id
        
        # Try exact match
        member_id = chamber_ids.get(name)
        
        # If no exact match, try fuzzy matching
        if member_id is None:
            logger.debug(f"No exact match found for {chamber} {name}, trying fuzzy match...")
            member_id = self.find_best_name_match(name, chamber_ids, raw_names)
        
        return member_id

    def scrape_legislators(self) -> List[Dict]:
        all_legislators = []
        errors = []
        
        # First get member IDs for both chambers
        senate_member_ids, senate_raw_names = self.scrape_member_ids("senate")
        house_member_ids, house_raw_names = self.scrape_member_ids("house")
        
        # Scrape Senate members
        senate_xml = self.fetch_xml(SENATE_MEMBERS_URL)
        if senate_xml:
            senators = self.parse_legislator_xml(senate_xml, "senate")
            for senator in senators:
                name = senator['name']
                member_id = self.get_member_id(name, senate_member_ids, senate_raw_names, "senator")
                
                if member_id is None:
                    errors.append({
                        "name": name,
                        "chamber": "senate",
                        "error": "No member ID found",
                        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
                    })
                
                senator['member_id'] = member_id
                if member_id:
                    member_url = f"https://ilga.gov/senate/Senator.asp?GA={self.current_session}&MemberID={member_id}"
                    senator['member_url'] = member_url
                    profile_info = self.get_profile_info(member_url)
                    senator.update(profile_info)
            all_legislators.extend(senators)
        
        # Scrape House members
        house_xml = self.fetch_xml(HOUSE_MEMBERS_URL)
        if house_xml:
            representatives = self.parse_legislator_xml(house_xml, "house")
            for rep in representatives:
                name = rep['name']
                member_id = self.get_member_id(name, house_member_ids, house_raw_names, "representative")
                
                if member_id is None:
                    errors.append({
                        "name": name,
                        "chamber": "house",
                        "error": "No member ID found",
                        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
                    })
                
                rep['member_id'] = member_id
                if member_id:
                    member_url = f"https://ilga.gov/house/Rep.asp?GA={self.current_session}&MemberID={member_id}"
                    rep['member_url'] = member_url
                    profile_info = self.get_profile_info(member_url)
                    rep.update(profile_info)
            all_legislators.extend(representatives)
        
        # Update errors file
        self.update_errors_file(errors)
        
        return all_legislators
    
    def update_database(self, legislators: List[Dict]):
        connection = self.get_db_connection()
        if not connection:
            return
        
        try:
            cursor = connection.cursor()
            
            # Delete only legislators from the current session
            cursor.execute("DELETE FROM legislators WHERE session = %s", (self.current_session,))
            
            for legislator in legislators:
                # Ensure all fields have at least an empty string instead of None
                legislator_data = {
                    'name': legislator.get('name', ''),
                    'district': legislator.get('district', ''),
                    'party': legislator.get('party', ''),
                    'chamber': legislator.get('chamber', ''),
                    'email': legislator.get('email', ''),
                    'phone': legislator.get('phone', ''),
                    'website': legislator.get('website', ''),
                    'session': self.current_session,
                    'address_line1': legislator.get('address_line1', ''),
                    'address_line2': legislator.get('address_line2', ''),
                    'city': legislator.get('city', ''),
                    'state': legislator.get('state', ''),
                    'zip_code': legislator.get('zip_code', ''),
                    'member_id': legislator.get('member_id'),
                    'member_url': legislator.get('member_url', ''),
                    'headshot_url': legislator.get('headshot_url', ''),
                    'committees': legislator.get('committees', ''),
                    'years_served': legislator.get('years_served', ''),
                    'district_office_address': legislator.get('district_office_address', ''),
                    'district_office_phone': legislator.get('district_office_phone', ''),
                    'district_office_fax': legislator.get('district_office_fax', ''),
                    'biography': legislator.get('biography', ''),
                    'associated_legislator_name': legislator.get('associated_legislator_name', ''),
                    'associated_legislator_id': legislator.get('associated_legislator_id')
                }
                
                # Log the data for problematic legislators
                if any(x in legislator_data['name'] for x in ['Guzm', 'Gonz', 'Jim', 'Ort']):
                    logger.debug(f"Inserting data for {legislator_data['name']}:")
                    for key, value in legislator_data.items():
                        logger.debug(f"  {key}: {value}")
                
                cursor.execute('''
                INSERT INTO legislators (
                    name, district, party, chamber, email, phone, website, session,
                    address_line1, address_line2, city, state, zip_code, member_id, member_url, headshot_url,
                    committees, years_served, district_office_address, district_office_phone, district_office_fax,
                    biography, associated_legislator_name, associated_legislator_id
                )
                VALUES (
                    %(name)s, %(district)s, %(party)s, %(chamber)s, %(email)s, %(phone)s, %(website)s, %(session)s,
                    %(address_line1)s, %(address_line2)s, %(city)s, %(state)s, %(zip_code)s, %(member_id)s, 
                    %(member_url)s, %(headshot_url)s, %(committees)s, %(years_served)s, %(district_office_address)s,
                    %(district_office_phone)s, %(district_office_fax)s, %(biography)s, %(associated_legislator_name)s,
                    %(associated_legislator_id)s
                )
                ''', legislator_data)
            
            connection.commit()
            logger.info(f"Updated database with {len(legislators)} legislators for session {self.current_session}")
        except Error as e:
            logger.error(f"Error updating database: {e}")
            # Log the full error details
            import traceback
            logger.error(traceback.format_exc())
        finally:
            if connection.is_connected():
                cursor.close()
                connection.close()

def main():
    os.makedirs('logs', exist_ok=True)
    
    scraper = ILGAScraper()
    
    if '--test' in sys.argv:
        logger.info("Running in test mode...")
        scraper.test_connection()
        return
    
    legislators = scraper.scrape_legislators()
    scraper.update_database(legislators)

if __name__ == "__main__":
    main() 