"""
US Stock Data Enrichment Module
Processes SEC EDGAR data, calculates financial ratios, and builds Claude prompts
"""

import re
import sqlite3
import json
from datetime import datetime
from typing import Dict, Any, Optional, List

from scraper_sec import (
    get_all_metrics, get_company_submissions, get_insider_activity,
    get_historical_metrics, ticker_to_cik, update_company_info
)
from us_error_logger import log_error, log_warning, log_info

# Structural skip rules (R1, R2, R4) - tickers matching these will never produce
# a meaningful analysis from our pipeline (which only reads us-gaap XBRL from
# 10-K/10-Q filings). See README / git history for the analysis behind these.
PREFERRED_SHARE_REGEX = re.compile(r'-P[A-Z]?$')

DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'


def _safe_float(value):
    """Safely convert to float"""
    if value is None:
        return None
    try:
        f = float(value)
        if f != f:  # NaN check
            return None
        return f
    except (ValueError, TypeError):
        return None


def _safe_divide(numerator, denominator):
    """Safe division returning None if denominator is 0 or None"""
    n = _safe_float(numerator)
    d = _safe_float(denominator)
    if n is None or d is None or d == 0:
        return None
    return n / d


def _format_large_number(value):
    """Format large numbers for display (e.g., 1.5B, 250M)"""
    if value is None:
        return 'N/A'
    v = _safe_float(value)
    if v is None:
        return 'N/A'
    if abs(v) >= 1e12:
        return '{:.1f}T'.format(v / 1e12)
    if abs(v) >= 1e9:
        return '{:.1f}B'.format(v / 1e9)
    if abs(v) >= 1e6:
        return '{:.1f}M'.format(v / 1e6)
    if abs(v) >= 1e3:
        return '{:.1f}K'.format(v / 1e3)
    return '{:.2f}'.format(v)


def calculate_ratios(metrics):
    """
    Calculate financial ratios from SEC metrics

    Args:
        metrics: Dict of metric_name -> {value, end_date, ...}

    Returns:
        Dict of calculated ratios
    """
    ratios = {}

    # Helper to get metric value
    def get_val(name):
        if name in metrics and metrics[name]:
            return _safe_float(metrics[name].get('value'))
        return None

    # Revenue
    revenue = get_val('revenue')
    ratios['revenue'] = revenue

    # Gross Profit & Margin
    gross_profit = get_val('gross_profit')
    ratios['gross_profit'] = gross_profit
    ratios['gross_margin'] = _safe_divide(gross_profit, revenue)
    if ratios['gross_margin']:
        ratios['gross_margin'] *= 100

    # Operating Income & Margin
    operating_income = get_val('operating_income')
    ratios['operating_income'] = operating_income
    ratios['operating_margin'] = _safe_divide(operating_income, revenue)
    if ratios['operating_margin']:
        ratios['operating_margin'] *= 100

    # Net Income & Margin
    net_income = get_val('net_income')
    ratios['net_income'] = net_income
    ratios['net_margin'] = _safe_divide(net_income, revenue)
    if ratios['net_margin']:
        ratios['net_margin'] *= 100

    # EPS
    ratios['eps_basic'] = get_val('eps_basic')
    ratios['eps_diluted'] = get_val('eps_diluted')

    # Balance Sheet Items
    total_assets = get_val('total_assets')
    total_liabilities = get_val('total_liabilities')
    stockholders_equity = get_val('stockholders_equity')
    current_assets = get_val('current_assets')
    current_liabilities = get_val('current_liabilities')
    cash = get_val('cash')
    inventory = get_val('inventory')
    long_term_debt = get_val('long_term_debt')

    ratios['total_assets'] = total_assets
    ratios['total_liabilities'] = total_liabilities
    ratios['stockholders_equity'] = stockholders_equity
    ratios['cash'] = cash
    ratios['long_term_debt'] = long_term_debt

    # Track if equity is negative (important for interpretation)
    ratios['negative_equity'] = stockholders_equity is not None and stockholders_equity < 0

    # ROE (Return on Equity)
    # Note: When equity is negative, ROE becomes misleading
    # Positive income / negative equity = negative ROE (counterintuitive)
    if stockholders_equity is not None and stockholders_equity > 0:
        ratios['roe'] = _safe_divide(net_income, stockholders_equity)
        if ratios['roe']:
            ratios['roe'] *= 100
    else:
        ratios['roe'] = None  # Don't calculate ROE with negative/zero equity

    # ROA (Return on Assets)
    ratios['roa'] = _safe_divide(net_income, total_assets)
    if ratios['roa']:
        ratios['roa'] *= 100

    # Current Ratio
    ratios['current_ratio'] = _safe_divide(current_assets, current_liabilities)

    # Quick Ratio
    quick_assets = None
    if current_assets is not None:
        inv = inventory if inventory else 0
        quick_assets = current_assets - inv
    ratios['quick_ratio'] = _safe_divide(quick_assets, current_liabilities)

    # Debt to Equity - use total liabilities for more comprehensive view
    # When equity is negative, D/E is not meaningful
    if stockholders_equity is not None and stockholders_equity > 0:
        # Use total liabilities as proxy for total debt (more conservative)
        total_debt = long_term_debt if long_term_debt else 0
        ratios['debt_to_equity'] = _safe_divide(total_debt, stockholders_equity)
    else:
        ratios['debt_to_equity'] = None  # Negative equity makes D/E meaningless

    # Debt to Assets
    ratios['debt_to_assets'] = _safe_divide(total_liabilities, total_assets)

    # Interest Coverage Ratio (EBIT / Interest Expense)
    interest_expense = get_val('interest_expense')
    ratios['interest_expense'] = interest_expense
    if operating_income is not None and interest_expense is not None and interest_expense > 0:
        ratios['interest_coverage'] = operating_income / interest_expense
    else:
        ratios['interest_coverage'] = None

    # Cash Flow Items
    operating_cf = get_val('operating_cash_flow')
    capex = get_val('capex')
    dividends = get_val('dividends_paid')
    buybacks = get_val('stock_buybacks')

    ratios['operating_cash_flow'] = operating_cf
    ratios['capex'] = capex
    ratios['dividends_paid'] = dividends
    ratios['stock_buybacks'] = buybacks

    # Free Cash Flow
    if operating_cf is not None:
        capex_val = capex if capex else 0
        ratios['free_cash_flow'] = operating_cf - abs(capex_val)
    else:
        ratios['free_cash_flow'] = None

    # FCF Margin
    ratios['fcf_margin'] = _safe_divide(ratios.get('free_cash_flow'), revenue)
    if ratios['fcf_margin']:
        ratios['fcf_margin'] *= 100

    # Data quality info
    data_date = None
    for m in metrics.values():
        if m and m.get('end_date'):
            if data_date is None or m['end_date'] > data_date:
                data_date = m['end_date']
    ratios['data_as_of'] = data_date

    # Count available metrics
    available = sum(1 for v in ratios.values() if v is not None)
    ratios['metrics_available'] = available
    ratios['data_completeness'] = int((available / len(ratios)) * 100)

    return ratios


def get_growth_rates(cik, metrics):
    """Calculate year-over-year growth rates using annual (FY) data only"""
    growth = {}

    def calc_yoy_growth(metric_name):
        """Calculate YoY growth comparing annual periods only"""
        hist = get_historical_metrics(cik, metric_name, periods=5, annual_only=True)
        if hist.get('status') != 'ok' or not hist.get('values'):
            return None

        values = hist['values']
        if len(values) < 2:
            return None

        # Compare most recent fiscal year to prior fiscal year
        current = _safe_float(values[0].get('value'))
        prior = _safe_float(values[1].get('value'))

        if current is not None and prior is not None and prior != 0:
            return ((current - prior) / abs(prior)) * 100
        return None

    # Calculate growth for each metric using annual data
    growth['revenue_growth'] = calc_yoy_growth('revenue')
    growth['net_income_growth'] = calc_yoy_growth('net_income')
    growth['eps_growth'] = calc_yoy_growth('eps_diluted')

    return growth


def enrich_stock(ticker):
    """
    Full enrichment pipeline for a stock

    Args:
        ticker: Stock ticker symbol

    Returns:
        Dict with all enriched data
    """
    result = {
        'ticker': ticker,
        'status': 'error',
        'timestamp': datetime.now().isoformat()
    }

    # R4: Preferred share - duplicate of issuer common (e.g. CMS-PB, GAB-PG).
    # Preferred shares share the issuer's CIK and financials with the common stock,
    # so any analysis is just a mislabeled copy of the common's analysis.
    if PREFERRED_SHARE_REGEX.search(ticker):
        result['status'] = 'skipped'
        result['reason'] = 'preferred share - duplicate of issuer common'
        log_info('enrichment', 'Skipped {} (preferred share)'.format(ticker), ticker=ticker)
        return result

    # Get CIK
    cik = ticker_to_cik(ticker)
    if not cik:
        result['error'] = 'CIK not found for ticker'
        log_warning('enrichment', 'CIK not found', ticker=ticker)
        return result

    result['cik'] = cik

    # Update company info in us_stock_master (SIC, exchange, state, etc.)
    update_company_info(ticker, cik)

    # Get company info
    submissions = get_company_submissions(cik)
    if submissions.get('status') != 'ok':
        result['error'] = 'Failed to fetch company info'
        log_error('enrichment', 'Failed to fetch submissions', ticker=ticker)
        return result

    company = submissions['company']
    result['company'] = {
        'name': company.get('name'),
        'sic_code': company.get('sic'),
        'sic_description': company.get('sic_description'),
        'exchange': company.get('exchanges', [None])[0] if company.get('exchanges') else None,
        'fiscal_year_end': company.get('fiscal_year_end'),
    }

    # R1: Investment company (mutual fund, BDC, closed-end fund, ETN, leveraged ETF).
    # SEC EDGAR's own classification. They file Form N-CSR/N-CEN/N-Q, never 10-K/10-Q,
    # and our pipeline's metrics (revenue, net margin, ROE) are meaningless for
    # pooled investment vehicles.
    if company.get('entity_type') == 'investment':
        result['status'] = 'skipped'
        result['reason'] = 'investment company (files N-CSR, not 10-K)'
        log_info('enrichment', 'Skipped {} (investment company)'.format(ticker), ticker=ticker)
        return result

    # R2: No 10-K or 10-Q filings present in recent submissions.
    # Catches foreign private issuers (file 20-F/6-K), closed-end funds, and
    # other entities whose filings our us-gaap+10-K/10-Q extractor cannot read.
    filings_list = submissions.get('filings', []) or []
    has_10k_q = any(
        (f.get('form') or '').rstrip('/A') in ('10-K', '10-Q')
        for f in filings_list
    )
    if not has_10k_q:
        result['status'] = 'skipped'
        result['reason'] = 'no 10-K/10-Q filings (foreign filer or special entity)'
        log_info('enrichment', 'Skipped {} (no 10-K/10-Q)'.format(ticker), ticker=ticker)
        return result

    # Get all metrics
    metrics_result = get_all_metrics(cik)
    if metrics_result.get('status') != 'ok':
        result['error'] = 'Failed to fetch financial data'
        log_error('enrichment', 'Failed to fetch metrics', ticker=ticker)
        return result

    metrics = metrics_result.get('metrics', {})
    result['raw_metrics'] = metrics

    # Calculate ratios
    ratios = calculate_ratios(metrics)
    result['ratios'] = ratios

    # Get growth rates
    growth = get_growth_rates(cik, metrics)
    result['growth'] = growth

    # Get insider activity
    insider = get_insider_activity(cik, days=90)
    if insider.get('status') == 'ok':
        result['insider_activity'] = insider

    # Recent filings
    result['recent_filings'] = submissions.get('filings', [])[:10]

    result['status'] = 'ok'
    log_info('enrichment', 'Enriched {} successfully'.format(ticker), ticker=ticker)

    return result


def build_claude_prompt(enriched_data):
    """
    Build a Claude analysis prompt from enriched data

    Args:
        enriched_data: Result from enrich_stock()

    Returns:
        Formatted prompt string
    """
    if enriched_data.get('status') != 'ok':
        return None

    ticker = enriched_data.get('ticker', 'UNKNOWN')
    cik = enriched_data.get('cik', 'N/A')
    company = enriched_data.get('company', {})
    ratios = enriched_data.get('ratios', {})
    growth = enriched_data.get('growth', {})
    insider = enriched_data.get('insider_activity', {})

    # Format values
    def fmt_num(val, decimals=1, suffix=''):
        if val is None:
            return 'N/A'
        return '{:.{d}f}{s}'.format(val, d=decimals, s=suffix)

    def fmt_money(val):
        return _format_large_number(val)

    def fmt_growth(val):
        if val is None:
            return 'N/A'
        sign = '+' if val >= 0 else ''
        return '{}{:.1f}%'.format(sign, val)

    prompt = """Analyze this US company based on SEC EDGAR financial data.
IMPORTANT: No stock price data is available. Focus ONLY on fundamentals.

===============================================================================
COMPANY: {company_name} ({ticker})
CIK: {cik}
SECTOR: {sic_description} (SIC: {sic_code})
EXCHANGE: {exchange}
===============================================================================

INCOME STATEMENT (Latest Period)
-------------------------------------------------------------------------------
Revenue:              {revenue}    ({revenue_growth} YoY)
Gross Profit:         {gross_profit}
Operating Income:     {operating_income}
Net Income:           {net_income}    ({net_income_growth} YoY)
EPS (Diluted):        {eps}           ({eps_growth} YoY)

PROFITABILITY RATIOS
-------------------------------------------------------------------------------
Gross Margin:         {gross_margin}
Operating Margin:     {operating_margin}
Net Margin:           {net_margin}
ROE:                  {roe}
ROA:                  {roa}

BALANCE SHEET
-------------------------------------------------------------------------------
Total Assets:         {total_assets}
Total Liabilities:    {total_liabilities}
Stockholders Equity:  {equity}
Cash & Equivalents:   {cash}
Long-term Debt:       {long_term_debt}

LIQUIDITY & LEVERAGE
-------------------------------------------------------------------------------
Current Ratio:        {current_ratio}
Quick Ratio:          {quick_ratio}
Debt/Equity:          {debt_to_equity}
Interest Coverage:    {interest_coverage}
{equity_warning}
CASH FLOW
-------------------------------------------------------------------------------
Operating Cash Flow:  {operating_cf}
Capital Expenditure:  {capex}
Free Cash Flow:       {fcf}
FCF Margin:           {fcf_margin}

INSIDER ACTIVITY (Last 90 Days)
-------------------------------------------------------------------------------
Form 4 Filings:       {insider_count}

DATA QUALITY
-------------------------------------------------------------------------------
Data Freshness:       {data_as_of}
Metrics Available:    {metrics_available}

===============================================================================
INSTRUCTIONS: Analyze the fundamentals and provide your assessment.
DO NOT reference stock price, P/E ratio, or any valuation metrics.
Focus on: profitability trends, financial health, growth quality.

Respond with ONLY this JSON (no markdown, no explanation):
{{
  "rating": "STRONG_BUY" | "BUY" | "HOLD" | "SELL" | "STRONG_SELL",
  "confidence": 1-100,
  "thesis": "2-3 sentence investment thesis based on fundamentals",
  "strengths": ["strength1", "strength2", "strength3"],
  "risks": ["risk1", "risk2", "risk3"],
  "key_metrics_to_watch": ["metric1", "metric2"]
}}
""".format(
        company_name=company.get('name', 'Unknown'),
        ticker=ticker,
        cik=cik,
        sic_description=company.get('sic_description', 'N/A'),
        sic_code=company.get('sic_code', 'N/A'),
        exchange=company.get('exchange', 'N/A'),
        revenue=fmt_money(ratios.get('revenue')),
        revenue_growth=fmt_growth(growth.get('revenue_growth')),
        gross_profit=fmt_money(ratios.get('gross_profit')),
        operating_income=fmt_money(ratios.get('operating_income')),
        net_income=fmt_money(ratios.get('net_income')),
        net_income_growth=fmt_growth(growth.get('net_income_growth')),
        eps=fmt_num(ratios.get('eps_diluted'), 2),
        eps_growth=fmt_growth(growth.get('eps_growth')),
        gross_margin=fmt_num(ratios.get('gross_margin'), 1, '%'),
        operating_margin=fmt_num(ratios.get('operating_margin'), 1, '%'),
        net_margin=fmt_num(ratios.get('net_margin'), 1, '%'),
        roe=fmt_num(ratios.get('roe'), 1, '%'),
        roa=fmt_num(ratios.get('roa'), 1, '%'),
        total_assets=fmt_money(ratios.get('total_assets')),
        total_liabilities=fmt_money(ratios.get('total_liabilities')),
        equity=fmt_money(ratios.get('stockholders_equity')),
        cash=fmt_money(ratios.get('cash')),
        long_term_debt=fmt_money(ratios.get('long_term_debt')),
        current_ratio=fmt_num(ratios.get('current_ratio'), 2, 'x'),
        quick_ratio=fmt_num(ratios.get('quick_ratio'), 2, 'x'),
        debt_to_equity=fmt_num(ratios.get('debt_to_equity'), 2, 'x'),
        interest_coverage=fmt_num(ratios.get('interest_coverage'), 1, 'x'),
        equity_warning='*** WARNING: Negative stockholders equity - D/E and ROE not meaningful ***\n' if ratios.get('negative_equity') else '',
        operating_cf=fmt_money(ratios.get('operating_cash_flow')),
        capex=fmt_money(ratios.get('capex')),
        fcf=fmt_money(ratios.get('free_cash_flow')),
        fcf_margin=fmt_num(ratios.get('fcf_margin'), 1, '%'),
        insider_count=insider.get('form4_count', 'N/A'),
        data_as_of=ratios.get('data_as_of', 'N/A'),
        metrics_available=ratios.get('metrics_available', 0)
    )

    return prompt


def save_metrics_to_db(ticker, ratios, growth):
    """Save calculated metrics to database"""
    try:
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        period = ratios.get('data_as_of', datetime.now().strftime('%Y-%m-%d'))

        cursor.execute('''
            INSERT OR REPLACE INTO us_calculated_metrics
            (ticker, period, gross_margin, operating_margin, net_margin,
             roe, roa, current_ratio, quick_ratio, debt_to_equity, debt_to_assets,
             revenue_growth, net_income_growth, eps_growth, eps_diluted,
             free_cash_flow, fcf_margin, data_as_of, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            ticker, period,
            ratios.get('gross_margin'),
            ratios.get('operating_margin'),
            ratios.get('net_margin'),
            ratios.get('roe'),
            ratios.get('roa'),
            ratios.get('current_ratio'),
            ratios.get('quick_ratio'),
            ratios.get('debt_to_equity'),
            ratios.get('debt_to_assets'),
            growth.get('revenue_growth'),
            growth.get('net_income_growth'),
            growth.get('eps_growth'),
            ratios.get('eps_diluted'),
            ratios.get('free_cash_flow'),
            ratios.get('fcf_margin'),
            ratios.get('data_as_of'),
            datetime.now().isoformat()
        ))

        conn.commit()
        conn.close()
        return True

    except Exception as e:
        log_error('enrichment', 'Failed to save metrics: {}'.format(str(e)),
                  ticker=ticker, exception=e)
        return False


def save_analysis_to_db(ticker, cik, analysis, ratios, processing_time_ms=0):
    """Save Claude analysis to database"""
    try:
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        cursor.execute('''
            INSERT INTO us_stock_analyses
            (ticker, cik, timestamp, rating, confidence, thesis,
             strengths, risks, key_metrics_to_watch,
             data_freshness, data_completeness, metrics_snapshot,
             model_used, processing_time_ms)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            ticker,
            cik,
            datetime.now().isoformat(),
            analysis.get('rating'),
            analysis.get('confidence'),
            analysis.get('thesis'),
            json.dumps(analysis.get('strengths', [])),
            json.dumps(analysis.get('risks', [])),
            json.dumps(analysis.get('key_metrics_to_watch', [])),
            ratios.get('data_as_of'),
            ratios.get('data_completeness'),
            json.dumps(ratios),
            'haiku',
            processing_time_ms
        ))

        # Add to GPT processing queue
        cursor.execute('''
            INSERT OR IGNORE INTO us_gpt_processing_queue
            (ticker, cik, claude_timestamp, status)
            VALUES (?, ?, ?, 'pending')
        ''', (ticker, cik, datetime.now().isoformat()))

        conn.commit()
        conn.close()
        log_info('enrichment', 'Saved analysis for {}: {}'.format(ticker, analysis.get('rating')),
                 ticker=ticker)
        return True

    except Exception as e:
        log_error('enrichment', 'Failed to save analysis: {}'.format(str(e)),
                  ticker=ticker, exception=e)
        return False


# Testing
if __name__ == '__main__':
    print("US Data Enrichment Test")
    print("=" * 60)

    # Test with Apple
    print("\nEnriching AAPL...")
    result = enrich_stock('AAPL')

    if result['status'] == 'ok':
        print("Company: {}".format(result['company']['name']))
        print("\nRatios:")
        ratios = result['ratios']
        for key in ['gross_margin', 'operating_margin', 'net_margin', 'roe', 'roa']:
            val = ratios.get(key)
            print("  {}: {:.1f}%".format(key, val) if val else "  {}: N/A".format(key))

        print("\nGrowth:")
        growth = result['growth']
        for key in ['revenue_growth', 'net_income_growth', 'eps_growth']:
            val = growth.get(key)
            print("  {}: {:.1f}%".format(key, val) if val else "  {}: N/A".format(key))

        print("\nBuilding Claude prompt...")
        prompt = build_claude_prompt(result)
        if prompt:
            print("Prompt length: {} chars".format(len(prompt)))
            print("\n--- First 500 chars ---")
            print(prompt[:500])
    else:
        print("Error: {}".format(result.get('error')))
