/**
 * KareeRise Job Extension - Extraction Helpers
 * 
 * This module contains functions for extracting job data from various job boards.
 */

// Helper function to clean HTML content to plain text
function cleanHtmlToText(htmlContent) {
    if (!htmlContent || typeof htmlContent !== 'string') {
        return '';
    }
    
    try {
        // Create a temporary div to parse HTML
        const tempDiv = document.createElement('div');
        tempDiv.innerHTML = htmlContent;
        
        // Remove script and style elements
        const scripts = tempDiv.querySelectorAll('script, style, noscript');
        scripts.forEach(el => el.remove());
        
        // Get text content and clean it up
        let text = tempDiv.textContent || tempDiv.innerText || '';
        
        // Clean up whitespace and formatting
        text = text
            // Replace multiple spaces with single space
            .replace(/\s+/g, ' ')
            // Remove excessive line breaks
            .replace(/\n\s*\n\s*\n/g, '\n\n')
            // Trim whitespace
            .trim();
            
        return text;
    } catch (error) {
        console.error('Error cleaning HTML:', error);
        
        // Fallback: simple tag removal
        return htmlContent
            .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
            .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
            .replace(/<[^>]*>/g, '')
            .replace(/\s+/g, ' ')
            .trim();
    }
}

// Helper function to detect country from the URL or locale
function detectCountry() {
    const url = window.location.href;
    const lang = navigator.language || navigator.userLanguage || "en-US";

    // Check for country in URL first
    if (url.includes(".sg/") || url.includes(".com.sg") || url.includes("sg.")) {
        return "Singapore";
    }
    if (url.includes(".my/") || url.includes(".com.my") || url.includes("my.")) {
        return "Malaysia";
    }
    if (url.includes(".ca/") || url.includes(".com.ca")) {
        return "Canada";
    }

    // Then check browser locale
    if (lang.includes("en-SG") || lang.includes("zh-SG")) {
        return "Singapore";
    }
    if (lang.includes("en-MY") || lang.includes("ms-MY")) {
        return "Malaysia";
    }
    if (lang.includes("en-CA") || lang.includes("fr-CA")) {
        return "Canada";
    }

    return "Global";
}

// Job source detection
function detectJobSource() {
    const url = window.location.href.toLowerCase();
    
    if (url.includes('linkedin.com')) {
        return 'linkedin';
    } else if (url.includes('indeed.com') || url.includes('indeed.co')) {
        return 'indeed';
    } else if (url.includes('glassdoor.com')) {
        return 'glassdoor';
    } else if (url.includes('monster.com')) {
        return 'monster';
    } else if (url.includes('ziprecruiter.com')) {
        return 'ziprecruiter';
    } else if (url.includes('jobstreet.com')) {
        return 'jobstreet';
    } else if (url.includes('jobsdb.com')) {
        return 'jobsdb';
    }
    
    // If none of the known job boards, assume company website
    return 'company';
}

// Extract LinkedIn job data directly from the page
function extractLinkedInJobData() {
    console.log("🔍 Extracting LinkedIn job data...");
    
    try {
        // Job title - multiple possible selectors
        let title = "";
        const titleSelectors = [
            'h1.job-title', 
            'h1.t-24', 
            'h1.topcard__title',
            'h1[data-test-job-title]',
            'h1.job-details-jobs-unified-top-card__job-title'
        ];
        
        for (const selector of titleSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                title = element.textContent.trim();
                console.log(`Found job title with selector ${selector}: ${title}`);
                break;
            }
        }
        
        // Company name - multiple possible selectors
        let company = "";
        const companySelectors = [
            'span.topcard__flavor.topcard__flavor--bullet:first-child',
            'a[data-test-company-name]',
            'a.topcard__org-name-link',
            'span.jobs-unified-top-card__company-name',
            'span.jobs-unified-top-card__subtitle-primary-grouping span:first-child',
            'div.job-details-jobs-unified-top-card__primary-description-without-tagline a',
            'a.job-details-jobs-unified-top-card__company-name',
            'span.job-details-jobs-unified-top-card__company-name',
            '.job-details-jobs-unified-top-card__primary-description-without-tagline a',
            '.job-details-jobs-unified-top-card__company-name',
            'a[data-tracking-control-name="public_jobs_topcard-org-name"]',
            '.jobs-unified-top-card__company-name a',
            '.jobs-unified-top-card__company-name span',
            'h3.job-details-jobs-unified-top-card__company-name',
            'h4.job-details-jobs-unified-top-card__company-name'
        ];
        
        for (const selector of companySelectors) {
            const element = document.querySelector(selector);
            if (element) {
                company = element.textContent.trim();
                console.log(`Found company with selector ${selector}: ${company}`);
                break;
            }
        }
        
        // Fallback: Extract company from page title if not found via selectors
        if (!company && document.title) {
            const titleParts = document.title.split(' - ');
            if (titleParts.length > 1) {
                // LinkedIn often has format "Job Title - Company Name - LinkedIn"
                company = titleParts[1].replace(' | LinkedIn', '').replace(' at ', '').trim();
                console.log(`Found company via title fallback: ${company}`);
            }
        }
        
        // Another fallback: Look for any element containing company information
        if (!company) {
            const companyElements = document.querySelectorAll('a, span, div');
            for (const element of companyElements) {
                const text = element.textContent.trim();
                if (text && text.length > 2 && text.length < 50 && 
                    element.getAttribute('href')?.includes('/company/') ||
                    element.className?.includes('company') ||
                    element.getAttribute('data-tracking-control-name')?.includes('org-name')) {
                    company = text;
                    console.log(`Found company via fallback search: ${company}`);
                    break;
                }
            }
        }
        
        // Location - multiple possible selectors
        let location = "";
        const locationSelectors = [
            'span.topcard__flavor.topcard__flavor--bullet:nth-child(2)',
            'span[data-test-job-location]',
            'span.jobs-unified-top-card__bullet',
            'span.jobs-unified-top-card__subtitle-primary-grouping span:nth-child(2)',
            'div.job-details-jobs-unified-top-card__primary-description-without-tagline span.job-details-jobs-unified-top-card__bullet'
        ];
        
        for (const selector of locationSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                location = element.textContent.trim();
                console.log(`Found location with selector ${selector}: ${location}`);
                break;
            }
        }
        
        // Description - multiple possible selectors
        let description = "";
        const descriptionSelectors = [
            'div.description__text',
            'div.show-more-less-html__markup',
            'div[data-test-job-description]',
            'div.jobs-description-content',
            'div.jobs-box__html-content'
        ];
        
        for (const selector of descriptionSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                description = cleanHtmlToText(element.innerHTML.trim());
                console.log(`Found description with selector ${selector}: ${description.substring(0, 100)}...`);
                break;
            }
        }
        
        return {
            title,
            company,
            location,
            description,
            jobUrl: window.location.href,
            source: 'linkedin'
        };
    } catch (error) {
        console.error("Error extracting LinkedIn job data:", error);
        
        // Return basic fallback data
        return {
            title: document.title.replace(" | LinkedIn", ""),
            company: "",
            location: "",
            description: document.body.innerText.substring(0, 5000),
            jobUrl: window.location.href,
            source: 'linkedin'
        };
    }
}

// Extract Indeed job data
function extractIndeedJobData() {
    console.log("🔍 Extracting Indeed job data...");
    
    try {
        let title = "";
        let company = "";
        let location = "";
        let description = "";
        
        // Indeed selectors vary based on site version
        const titleSelectors = [
            'h1.jobsearch-JobInfoHeader-title',
            'h1[data-testid="jobTitle"]',
            'h1.icl-u-xs-mb--xs'
        ];
        
        for (const selector of titleSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                title = element.textContent.trim();
                break;
            }
        }
        
        const companySelectors = [
            'div[data-testid="inlineHeader-companyName"]',
            'div.jobsearch-InlineCompanyRating > div:first-child',
            'div.icl-u-lg-mr--sm'
        ];
        
        for (const selector of companySelectors) {
            const element = document.querySelector(selector);
            if (element) {
                company = element.textContent.trim();
                break;
            }
        }
        
        const locationSelectors = [
            'div[data-testid="inlineHeader-companyLocation"]',
            'div.jobsearch-JobInfoHeader-subtitle > div:nth-child(2)',
            'div.icl-u-xs-mt--xs'
        ];
        
        for (const selector of locationSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                location = element.textContent.trim();
                break;
            }
        }
        
        const descriptionSelectors = [
            'div[data-testid="jobDescriptionText"]',
            'div#jobDescriptionText',
            'div.jobsearch-jobDescriptionText'
        ];
        
        for (const selector of descriptionSelectors) {
            const element = document.querySelector(selector);
            if (element) {
                description = cleanHtmlToText(element.innerHTML.trim());
                break;
            }
        }
        
        return {
            title,
            company,
            location,
            description,
            jobUrl: window.location.href,
            source: 'indeed'
        };
    } catch (error) {
        console.error("Error extracting Indeed job data:", error);
        
        // Return basic fallback data
        return {
            title: document.title.replace(" | Indeed.com", ""),
            company: "",
            location: "",
            description: document.body.innerText.substring(0, 5000),
            jobUrl: window.location.href,
            source: 'indeed'
        };
    }
}

// Generic extraction for unsupported sites
function extractGenericJobData() {
    console.log("🔍 Extracting generic job data...");
    
    try {
        // Look for title in common heading elements
        let title = "";
        const possibleTitles = document.querySelectorAll('h1, h2');
        for (const element of possibleTitles) {
            if (element.textContent.trim().length > 5 && element.textContent.length < 100) {
                title = element.textContent.trim();
                break;
            }
        }
        
        // If no title found, use document title
        if (!title) {
            title = document.title;
        }
        
        // For company, look for metadata or common patterns
        let company = "";
        const metaCompany = document.querySelector('meta[property="og:site_name"]');
        if (metaCompany) {
            company = metaCompany.getAttribute('content');
        } else {
            // Look for company logo alt text or common company name patterns
            const logo = document.querySelector('img[alt*="logo"]');
            if (logo && logo.alt) {
                company = logo.alt.replace(' logo', '').replace('Logo', '');
            }
        }
        
        // Extract main page content for description
        let description = "";
        const mainContent = document.querySelector('main') || document.querySelector('article');
        if (mainContent) {
            description = cleanHtmlToText(mainContent.innerHTML);
        } else {
            // Fallback: get first large text block
            description = cleanHtmlToText(document.body.innerHTML.substring(0, 10000));
        }
        
        return {
            title,
            company,
            location: detectCountry(), // Use detected country as fallback
            description,
            jobUrl: window.location.href,
            source: detectJobSource()
        };
    } catch (error) {
        console.error("Error extracting generic job data:", error);
        
        // Basic fallback
        return {
            title: document.title,
            company: window.location.hostname.replace('www.', ''),
            location: detectCountry(),
            description: document.body.innerText.substring(0, 5000),
            jobUrl: window.location.href,
            source: 'other'
        };
    }
}

// Utility function to safely check class names (handles SVGAnimatedString and other non-string className types)
function hasClassName(element, className) {
    if (!element || !element.className) return false;

    // For standard DOM elements with string className
    if (typeof element.className === "string") {
        return element.className.includes(className);
    }

    // For SVG elements with SVGAnimatedString
    if (element.className.baseVal !== undefined) {
        return element.className.baseVal.includes(className);
    }

    // For elements with classList
    if (element.classList && element.classList.contains) {
        return element.classList.contains(className);
    }

    // Last resort: try toString()
    try {
        return element.className.toString().includes(className);
    } catch (e) {
        return false;
    }
}

// Helper function to detect Edge browser
function isEdgeBrowser() {
    return navigator.userAgent.indexOf("Edg") !== -1;
}

// Detect job board source from URL
function detectSource(url) {
    const urlLower = url.toLowerCase();

    if (urlLower.includes('linkedin.com')) return 'linkedin';
    if (urlLower.includes('indeed.com')) return 'indeed';
    if (urlLower.includes('glassdoor')) return 'glassdoor';
    if (urlLower.includes('ziprecruiter.com')) return 'ziprecruiter';
    if (urlLower.includes('monster.com')) return 'monster';
    if (urlLower.includes('dice.com')) return 'dice';
    if (urlLower.includes('simplyhired.com')) return 'simplyhired';
    if (urlLower.includes('careerbuilder.com')) return 'careerbuilder';

    // If not a known job board, it's likely a company website
    return 'company';
}

// Extract page HTML for AI processing - optimized for performance and quality
function extractPageHTML() {
    console.log("🔍 Extracting optimized page HTML for AI processing...");

    const url = window.location.href;
    const source = detectSource(url);

    console.log(`🔍 Detected source: ${source} from URL: ${url}`);

    const result = {
        title: document.title,
        url: url,
        source: source,
        fullHTML: ''
    };
    
    try {
        // Use optimized extraction with site-specific selectors
        result.fullHTML = extractOptimizedHTMLContent();
        
        console.log(`✅ Optimized HTML extraction complete: ${result.fullHTML.length} characters`);
        
        return result;
        
    } catch (error) {
        console.error("❌ Error extracting page HTML:", error);

        // Minimal fallback
        return {
            title: document.title,
            url: window.location.href,
            source: source,
            fullHTML: `<div>
                <h1>${document.title}</h1>
                <p>Error extracting content. URL: ${window.location.href}</p>
                <p>URL: ${window.location.href}</p>
            </div>`
        };
    }
}

// Note: Site-specific functions removed - now using universal visibility filtering

// Check if element is actually visible (not just CSS display:none)
function isElementActuallyVisible(element) {
    if (!element) return false;
    
    // Check CSS display property
    const style = window.getComputedStyle(element);
    if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
        return false;
    }
    
    // Check if element has dimensions
    const rect = element.getBoundingClientRect();
    if (rect.width === 0 || rect.height === 0) {
        return false;
    }
    
    // Check if element is in viewport (at least partially)
    const isInViewport = rect.top < window.innerHeight && 
                        rect.bottom > 0 && 
                        rect.left < window.innerWidth && 
                        rect.right > 0;
    
    if (!isInViewport) {
        // Element might be in a scrollable container, so don't reject it completely
        // but give it lower priority
        console.log('⚠️ Element not in viewport, but keeping it');
    }
    
    return true;
}

// Helper function to extract optimized HTML content (only visible elements)
function extractOptimizedHTMLContent() {
    console.log('🔍 Starting optimized HTML content extraction (visible elements only)...');
    
    // Universal job content selectors - will be filtered for visibility
    const jobContentSelectors = [
        // LinkedIn selectors
        '.jobs-search__job-details',
        '.job-details',
        '.jobs-details',
        '[data-job-id]',
        '.job-view-layout',
        '.jobs-unified-top-card',
        '.job-details-jobs-unified-top-card',
        '.jobs-box__html-content',
        
        // Indeed selectors
        '.jobsearch-SerpJobCard',
        '.jobsearch-JobComponent',
        '#jobDescriptionText',
        '.jobsearch-jobDescriptionText',
        '.jobsearch-JobComponent-description',
        '.jobsearch-jobDescriptionText',
        
        // Glassdoor selectors
        '.jobDescriptionContent',
        '.jobDescContainer',
        '.empReviews',
        '.jobListing',
        '[data-test="jobDescription"]',
        '.jobDescription',
        
        // ZipRecruiter selectors
        '.job_content',
        '.job-description',
        '.job_description',
        '.job-summary',
        
        // Generic job-related selectors
        '[class*="job"]',
        '[class*="description"]',
        '[class*="posting"]',
        '[class*="details"]',
        '[id*="job"]',
        '[id*="description"]',
        'main',
        'article',
        '[role="main"]'
    ];
    
    let relevantContent = '';
    let contentFound = false;
    
    // Try to find job-specific content, but ONLY if it's visible
    for (const selector of jobContentSelectors) {
        const elements = document.querySelectorAll(selector);
        if (elements.length > 0) {
            console.log(`🔍 Checking ${elements.length} elements with selector: ${selector}`);
            
            elements.forEach((element, index) => {
                // Only include if element is actually visible
                if (isElementActuallyVisible(element)) {
                    const text = element.innerHTML;
                    if (text && text.length > 100) {
                        console.log(`✅ Found visible content with selector: ${selector} (index ${index})`);
                        relevantContent += `<div class="extracted-visible-section" data-selector="${selector}" data-index="${index}">${text}</div>\n`;
                        contentFound = true;
                    }
                } else {
                    console.log(`⚪ Skipping hidden element with selector: ${selector} (index ${index})`);
                }
            });
            
            // If we found substantial visible content, we can stop
            if (relevantContent.length > 8000) {
                console.log(`✅ Found sufficient visible content (${relevantContent.length} chars), stopping search`);
                break;
            }
        }
    }
    
    // If no specific job content found, get visible page essentials
    if (!contentFound || relevantContent.length < 1000) {
        console.log('⚠️ No specific visible job content found, extracting visible page essentials...');
        
        // Get visible title and headers
        const titles = document.querySelectorAll('h1, h2, h3, h4, .title, [class*="title"], [class*="heading"]');
        titles.forEach((title, index) => {
            if (isElementActuallyVisible(title) && title.textContent && title.textContent.trim().length > 0) {
                console.log(`✅ Found visible title: ${title.textContent.substring(0, 50)}...`);
                relevantContent += `<div class="visible-title-section" data-index="${index}">${title.outerHTML}</div>\n`;
            }
        });
        
        // Get visible main content area
        const mainCandidates = [
            document.querySelector('main'),
            document.querySelector('#main'),
            document.querySelector('.main-content'),
            document.querySelector('[role="main"]')
        ];
        
        for (const mainContent of mainCandidates) {
            if (mainContent && isElementActuallyVisible(mainContent)) {
                console.log(`✅ Found visible main content area`);
                const mainHTML = mainContent.innerHTML;
                const truncatedMain = mainHTML.length > 20000 ? mainHTML.substring(0, 20000) : mainHTML;
                relevantContent += `<div class="visible-main-content">${truncatedMain}</div>\n`;
                break;
            }
        }
        
        // Get any visible text content if still too small
        if (relevantContent.length < 2000) {
            console.log('⚠️ Still need more content, extracting visible body text...');
            // Get visible text by finding elements with substantial visible text
            const textElements = document.querySelectorAll('p, div, span, section, article');
            let visibleText = '';
            
            textElements.forEach(element => {
                if (isElementActuallyVisible(element) && element.textContent && element.textContent.trim().length > 50) {
                    visibleText += element.textContent.trim() + ' ';
                }
            });
            
            if (visibleText.length > 0) {
                const truncatedText = visibleText.length > 15000 ? visibleText.substring(0, 15000) : visibleText;
                relevantContent += `<div class="visible-page-text">${truncatedText.replace(/\n/g, '<br>')}</div>\n`;
            }
        }
    }
    
    // Clean up and optimize the content
    let optimizedHTML = relevantContent
        .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') // Remove scripts
        .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') // Remove styles  
        .replace(/<iframe[^>]*>[\s\S]*?<\/iframe>/gi, '') // Remove iframes
        .replace(/<!--[\s\S]*?-->/g, '') // Remove comments
        .replace(/\s+/g, ' ') // Normalize whitespace
        .trim();
    
    // Final size check and truncation
    const maxSize = 25000; // 25KB limit for better performance
    if (optimizedHTML.length > maxSize) {
        console.log(`⚠️ Truncating HTML from ${optimizedHTML.length} to ${maxSize} chars`);
        optimizedHTML = optimizedHTML.substring(0, maxSize) + '\n<!-- [TRUNCATED BY EXTRACTION HELPER] -->';
    }
    
    // Add metadata for AI context
    const metadata = JSON.stringify({
        title: document.title,
        url: window.location.href,
        domain: window.location.hostname,
        timestamp: new Date().toISOString(),
        extractionMethod: 'visible-only',
        originalLength: document.body.innerHTML.length,
        optimizedLength: optimizedHTML.length
    });
    
    // Create structured HTML response
    const structuredHTML = `<!-- METADATA: ${metadata} -->
<html>
<head>
    <title>${document.title || 'Job Posting'}</title>
    <meta name="url" content="${window.location.href}">
    <meta name="domain" content="${window.location.hostname}">
</head>
<body>
    ${optimizedHTML}
</body>
</html>`;
    
    console.log(`📄 Visible-only extraction complete: ${structuredHTML.length} characters (was ${document.body.innerHTML.length})`);
    
    return structuredHTML;
}

// Helper function to clean and optimize HTML content
function cleanAndOptimizeHTML(content) {
    return content
        .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '') // Remove scripts
        .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '') // Remove styles  
        .replace(/<iframe[^>]*>[\s\S]*?<\/iframe>/gi, '') // Remove iframes
        .replace(/<!--[\s\S]*?-->/g, '') // Remove comments
        .replace(/\s+/g, ' ') // Normalize whitespace
        .trim();
}

// Note: createStructuredHTML function removed - integrated into main function

// Export helpers for use in content.js
window.KareeRiseExtractionHelpers = {
    detectCountry,
    detectJobSource,
    extractLinkedInJobData,
    extractIndeedJobData,
    extractGenericJobData,
    hasClassName,
    isEdgeBrowser,
    extractPageHTML,
    extractOptimizedHTMLContent,
    isElementActuallyVisible,
    cleanHtmlToText
};