"""
SEC EDGAR API Client
Fetches financial data from SEC EDGAR for US stocks
Public domain data - 100% legal for commercial use
"""

import requests
import json
import time
import sqlite3
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List

from us_error_logger import log_error, log_warning, log_info, log_api_error, log_debug

# Configuration
SEC_BASE_URL = "https://data.sec.gov"
SEC_SUBMISSIONS_URL = "https://data.sec.gov/submissions/CIK{}.json"
SEC_FACTS_URL = "https://data.sec.gov/api/xbrl/companyfacts/CIK{}.json"
SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"
SEC_TICKERS_EXCHANGE_URL = "https://www.sec.gov/files/company_tickers_exchange.json"

USER_AGENT = "MCMarketsHost Research contact@marketshost.com"
RATE_LIMIT_DELAY = 0.15  # 150ms between requests (safe for 10 req/s limit)
REQUEST_TIMEOUT = 30

DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'

# XBRL Concept Mapping - Different companies use different tags
CONCEPT_MAPPING = {
    # Revenue (includes banking/financial services tags)
    'revenue': [
        'Revenues',
        'RevenueFromContractWithCustomerExcludingAssessedTax',
        'RevenueFromContractWithCustomerIncludingAssessedTax',
        'SalesRevenueNet',
        'SalesRevenueGoodsNet',
        'SalesRevenueServicesNet',
        'TotalRevenuesAndOtherIncome',
        # Banking/Financial Services specific
        'InterestAndDividendIncomeOperating',
        'InterestIncomeExpenseNet',
        'RevenuesNetOfInterestExpense',
        'NoninterestIncome',
        'TotalNoninterestIncome',
        'FinancialServicesRevenue',
        'InvestmentBankingRevenue',
        'NetRevenuesIncludingNetInterestIncome',
    ],
    # Cost of Revenue
    'cost_of_revenue': [
        'CostOfRevenue',
        'CostOfGoodsAndServicesSold',
        'CostOfGoodsSold',
        'CostOfServices',
    ],
    # Gross Profit
    'gross_profit': [
        'GrossProfit',
    ],
    # Operating Expenses
    'operating_expenses': [
        'OperatingExpenses',
        'CostsAndExpenses',
    ],
    # Operating Income
    'operating_income': [
        'OperatingIncomeLoss',
        'IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest',
    ],
    # Net Income
    'net_income': [
        'NetIncomeLoss',
        'NetIncomeLossAvailableToCommonStockholdersBasic',
        'ProfitLoss',
        'NetIncomeLossAttributableToParent',
    ],
    # EPS
    'eps_basic': [
        'EarningsPerShareBasic',
    ],
    'eps_diluted': [
        'EarningsPerShareDiluted',
    ],
    # Total Assets
    'total_assets': [
        'Assets',
    ],
    # Current Assets
    'current_assets': [
        'AssetsCurrent',
    ],
    # Cash (balance sheet items only, not cash flow changes)
    'cash': [
        'CashAndCashEquivalentsAtCarryingValue',
        'Cash',
        'CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalents',
    ],
    # Accounts Receivable
    'accounts_receivable': [
        'AccountsReceivableNetCurrent',
        'AccountsReceivableNet',
        'ReceivablesNetCurrent',
    ],
    # Inventory
    'inventory': [
        'InventoryNet',
        'Inventories',
        'InventoryFinishedGoods',
    ],
    # Total Liabilities
    'total_liabilities': [
        'Liabilities',
    ],
    # Current Liabilities
    'current_liabilities': [
        'LiabilitiesCurrent',
    ],
    # Accounts Payable
    'accounts_payable': [
        'AccountsPayableCurrent',
        'AccountsPayable',
    ],
    # Long-term Debt
    'long_term_debt': [
        'LongTermDebt',
        'LongTermDebtNoncurrent',
        'DebtInstrumentCarryingAmount',
        'LongTermDebtAndCapitalLeaseObligations',
    ],
    # Total Debt
    'total_debt': [
        'DebtAndCapitalLeaseObligations',
        'LongTermDebtAndCapitalLeaseObligationsCurrent',
    ],
    # Stockholders Equity
    'stockholders_equity': [
        'StockholdersEquity',
        'StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest',
    ],
    # Retained Earnings
    'retained_earnings': [
        'RetainedEarningsAccumulatedDeficit',
    ],
    # Common Shares Outstanding
    'shares_outstanding': [
        'CommonStockSharesOutstanding',
        'WeightedAverageNumberOfSharesOutstandingBasic',
        'WeightedAverageNumberOfDilutedSharesOutstanding',
    ],
    # Interest Expense
    'interest_expense': [
        'InterestExpense',
        'InterestExpenseDebt',
        'InterestAndDebtExpense',
        'InterestPaidNet',
    ],
    # Operating Cash Flow
    'operating_cash_flow': [
        'NetCashProvidedByUsedInOperatingActivities',
    ],
    # Investing Cash Flow
    'investing_cash_flow': [
        'NetCashProvidedByUsedInInvestingActivities',
    ],
    # Financing Cash Flow
    'financing_cash_flow': [
        'NetCashProvidedByUsedInFinancingActivities',
    ],
    # Capital Expenditure
    'capex': [
        'PaymentsToAcquirePropertyPlantAndEquipment',
        'PaymentsToAcquireProductiveAssets',
    ],
    # Dividends
    'dividends_paid': [
        'PaymentsOfDividends',
        'PaymentsOfDividendsCommonStock',
        'DividendsCash',
    ],
    # Stock Buybacks
    'stock_buybacks': [
        'PaymentsForRepurchaseOfCommonStock',
        'PaymentsForRepurchaseOfEquity',
    ],
    # R&D Expense
    'rd_expense': [
        'ResearchAndDevelopmentExpense',
    ],
    # SG&A Expense
    'sga_expense': [
        'SellingGeneralAndAdministrativeExpense',
    ],
    # Interest Expense
    'interest_expense': [
        'InterestExpense',
        'InterestExpenseDebt',
    ],
    # Income Tax Expense
    'income_tax': [
        'IncomeTaxExpenseBenefit',
    ],
    # Depreciation & Amortization
    'depreciation': [
        'DepreciationDepletionAndAmortization',
        'DepreciationAndAmortization',
        'Depreciation',
    ],
}

# S&P 500 tickers for priority processing (partial list - top 100 by market cap)
SP500_TOP = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'META', 'TSLA', 'BRK-B', 'UNH', 'JNJ',
    'JPM', 'V', 'PG', 'XOM', 'HD', 'CVX', 'MA', 'ABBV', 'MRK', 'COST',
    'PEP', 'AVGO', 'KO', 'LLY', 'WMT', 'MCD', 'CSCO', 'TMO', 'ABT', 'CRM',
    'ACN', 'DHR', 'ADBE', 'NKE', 'CMCSA', 'VZ', 'NFLX', 'TXN', 'PM', 'NEE',
    'BMY', 'INTC', 'RTX', 'QCOM', 'UPS', 'HON', 'T', 'LOW', 'AMGN', 'IBM',
    'ORCL', 'GE', 'CAT', 'SPGI', 'INTU', 'DE', 'BA', 'AMD', 'GS', 'AXP',
    'SBUX', 'GILD', 'BLK', 'PLD', 'MDLZ', 'ADI', 'ISRG', 'CVS', 'TJX', 'SYK',
    'BKNG', 'MMC', 'ADP', 'VRTX', 'REGN', 'LMT', 'CI', 'TMUS', 'ZTS', 'CB',
    'ETN', 'NOW', 'MO', 'SO', 'SCHW', 'EOG', 'PNC', 'BDX', 'DUK', 'SLB',
    'C', 'BSX', 'LRCX', 'ITW', 'MU', 'AON', 'WM', 'CL', 'EQIX', 'ICE',
]

# Rate limiting
_last_request_time = 0


def _rate_limited_request(url, headers=None):
    """Make a rate-limited HTTP request to SEC"""
    global _last_request_time

    # Ensure minimum delay between requests
    elapsed = time.time() - _last_request_time
    if elapsed < RATE_LIMIT_DELAY:
        time.sleep(RATE_LIMIT_DELAY - elapsed)

    if headers is None:
        headers = {}
    headers['User-Agent'] = USER_AGENT

    try:
        response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
        _last_request_time = time.time()

        if response.status_code == 200:
            return {'status': 'ok', 'data': response.json()}
        elif response.status_code == 404:
            return {'status': 'not_found', 'error': 'Resource not found'}
        elif response.status_code == 429:
            log_api_error('scraper_sec', url, 429, 'Rate limited by SEC')
            return {'status': 'rate_limited', 'error': 'Rate limited'}
        else:
            log_api_error('scraper_sec', url, response.status_code, 'HTTP error')
            return {'status': 'error', 'error': 'HTTP {}'.format(response.status_code)}

    except requests.exceptions.Timeout:
        log_api_error('scraper_sec', url, 0, 'Request timeout')
        return {'status': 'timeout', 'error': 'Request timeout'}
    except requests.exceptions.RequestException as e:
        log_error('scraper_sec', str(e), error_type='RequestError')
        return {'status': 'error', 'error': str(e)}
    except json.JSONDecodeError as e:
        log_error('scraper_sec', 'JSON decode error: {}'.format(str(e)), error_type='ParseError')
        return {'status': 'error', 'error': 'Invalid JSON response'}


def get_all_tickers():
    """Fetch all company tickers from SEC"""
    result = _rate_limited_request(SEC_TICKERS_URL)

    if result['status'] != 'ok':
        return result

    tickers = {}
    data = result['data']

    for key, val in data.items():
        ticker = val.get('ticker', '')
        cik = str(val.get('cik_str', '')).zfill(10)
        name = val.get('title', '')

        if ticker and cik:
            tickers[ticker] = {
                'cik': cik,
                'company_name': name,
                'priority': 1 if ticker in SP500_TOP else 100
            }

    log_info('scraper_sec', 'Fetched {} tickers from SEC'.format(len(tickers)))
    return {'status': 'ok', 'tickers': tickers, 'count': len(tickers)}


def get_company_submissions(cik):
    """Get company info and filings list from SEC"""
    cik_padded = str(cik).zfill(10)
    url = SEC_SUBMISSIONS_URL.format(cik_padded)

    result = _rate_limited_request(url)

    if result['status'] != 'ok':
        return result

    data = result['data']

    company_info = {
        'cik': data.get('cik'),
        'name': data.get('name'),
        'tickers': data.get('tickers', []),
        'exchanges': data.get('exchanges', []),
        'sic': data.get('sic'),
        'sic_description': data.get('sicDescription'),
        'fiscal_year_end': data.get('fiscalYearEnd'),
        'entity_type': data.get('entityType'),
        'category': data.get('category'),
        'state_of_incorporation': data.get('stateOfIncorporation'),
        'ein': data.get('ein'),
        'website': data.get('website'),
    }

    # Extract recent filings
    filings = data.get('filings', {}).get('recent', {})
    recent_filings = []

    forms = filings.get('form', [])
    dates = filings.get('filingDate', [])
    accessions = filings.get('accessionNumber', [])
    primary_docs = filings.get('primaryDocument', [])

    for i in range(min(100, len(forms))):
        recent_filings.append({
            'form': forms[i] if i < len(forms) else None,
            'filing_date': dates[i] if i < len(dates) else None,
            'accession_number': accessions[i] if i < len(accessions) else None,
            'primary_document': primary_docs[i] if i < len(primary_docs) else None,
        })

    return {
        'status': 'ok',
        'company': company_info,
        'filings': recent_filings
    }


def get_company_facts(cik):
    """Get XBRL financial facts for a company"""
    cik_padded = str(cik).zfill(10)
    url = SEC_FACTS_URL.format(cik_padded)

    result = _rate_limited_request(url)

    if result['status'] != 'ok':
        return result

    data = result['data']

    return {
        'status': 'ok',
        'entity_name': data.get('entityName'),
        'cik': data.get('cik'),
        'facts': data.get('facts', {})
    }


def extract_metric(facts, metric_name, form_filter=None, latest_only=True):
    """
    Extract a specific metric from XBRL facts

    Args:
        facts: The 'facts' dict from get_company_facts
        metric_name: Key from CONCEPT_MAPPING
        form_filter: List of form types to filter (e.g., ['10-K', '10-Q'])
        latest_only: If True, return only the most recent value

    Returns:
        List of values or single value if latest_only=True

    Note: Tries ALL concept tags and returns the one with the most recent data,
          not just the first matching tag (which may have outdated data).
    """
    if metric_name not in CONCEPT_MAPPING:
        return None

    concepts = CONCEPT_MAPPING[metric_name]
    us_gaap = facts.get('us-gaap', {})

    # Collect best (most recent) value from each concept tag
    all_candidates = []

    for concept in concepts:
        if concept not in us_gaap:
            continue

        concept_data = us_gaap[concept]
        units = concept_data.get('units', {})

        # Try USD first, then USD/shares
        values_list = units.get('USD', units.get('USD/shares', units.get('shares', [])))

        if not values_list:
            continue

        # Filter by form type
        if form_filter:
            values_list = [v for v in values_list if v.get('form') in form_filter]

        if not values_list:
            continue

        # Sort by end date (most recent first)
        values_list = sorted(values_list, key=lambda x: x.get('end', ''), reverse=True)

        if latest_only:
            # Get the most recent 10-K or 10-Q value from this concept
            for v in values_list:
                if v.get('form') in ['10-K', '10-Q']:
                    all_candidates.append({
                        'value': v.get('val'),
                        'end_date': v.get('end'),
                        'filed': v.get('filed'),
                        'form': v.get('form'),
                        'fy': v.get('fy'),
                        'fp': v.get('fp'),
                        'frame': v.get('frame'),
                        'concept': concept
                    })
                    break  # Only take the most recent from this concept
        else:
            # For historical data, collect all from this concept
            for v in values_list[:20]:
                all_candidates.append({
                    'value': v.get('val'),
                    'end_date': v.get('end'),
                    'filed': v.get('filed'),
                    'form': v.get('form'),
                    'fy': v.get('fy'),
                    'fp': v.get('fp'),
                    'frame': v.get('frame'),
                    'concept': concept
                })

    if not all_candidates:
        return None

    if latest_only:
        # Return the candidate with the most recent end_date
        best = max(all_candidates, key=lambda x: x.get('end_date', ''))
        return best
    else:
        # Sort all candidates by end_date and return
        all_candidates = sorted(all_candidates, key=lambda x: x.get('end_date', ''), reverse=True)
        return all_candidates[:20]


def get_all_metrics(cik):
    """Get all key financial metrics for a company"""
    facts_result = get_company_facts(cik)

    if facts_result['status'] != 'ok':
        return facts_result

    facts = facts_result['facts']
    metrics = {}

    # Extract all metrics
    for metric_name in CONCEPT_MAPPING.keys():
        result = extract_metric(facts, metric_name, form_filter=['10-K', '10-Q'], latest_only=True)
        if result:
            metrics[metric_name] = result

    return {
        'status': 'ok',
        'entity_name': facts_result['entity_name'],
        'cik': facts_result['cik'],
        'metrics': metrics,
        'metrics_count': len(metrics),
        'total_possible': len(CONCEPT_MAPPING)
    }


def get_historical_metrics(cik, metric_name, periods=8, annual_only=False):
    """Get historical values for a specific metric

    Args:
        cik: Company CIK
        metric_name: Key from CONCEPT_MAPPING
        periods: Number of periods to return
        annual_only: If True, only return full-year data for proper YoY comparison
                     Uses 'frame' field pattern: CY2024 (annual) vs CY2024Q3 (quarterly)
    """
    facts_result = get_company_facts(cik)

    if facts_result['status'] != 'ok':
        return facts_result

    facts = facts_result['facts']
    values = extract_metric(facts, metric_name, form_filter=['10-K', '10-Q'], latest_only=False)

    if not values:
        return {'status': 'not_found', 'error': 'Metric not found'}

    # Filter for annual data only if requested (for proper YoY comparison)
    if annual_only:
        import re
        # Frame format: CY2024 (annual) vs CY2024Q1 (quarterly)
        # Annual frames match pattern CY followed by exactly 4 digits (no Q suffix)
        annual_pattern = re.compile(r'^CY\d{4}$')
        values = [v for v in values if v.get('frame') and annual_pattern.match(v.get('frame', ''))]
        # Sort by frame year (descending) to get most recent first
        values = sorted(values, key=lambda x: x.get('frame', ''), reverse=True)

    return {
        'status': 'ok',
        'metric': metric_name,
        'values': values[:periods]
    }


def get_insider_activity(cik, days=90):
    """Get Form 4 insider trading activity"""
    submissions = get_company_submissions(cik)

    if submissions['status'] != 'ok':
        return submissions

    filings = submissions['filings']
    cutoff = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')

    form4_count = 0
    form4_buy = 0
    form4_sell = 0

    for filing in filings:
        if filing['form'] == '4' and filing['filing_date'] >= cutoff:
            form4_count += 1
            # Note: Detailed buy/sell requires parsing the actual filing
            # For now, just count Form 4s

    return {
        'status': 'ok',
        'form4_count': form4_count,
        'period_days': days
    }


def ticker_to_cik(ticker):
    """Convert ticker symbol to CIK"""
    result = get_all_tickers()
    if result['status'] != 'ok':
        return None

    tickers = result.get('tickers', {})
    if ticker.upper() in tickers:
        return tickers[ticker.upper()]['cik']

    return None


def save_tickers_to_db(tickers_data):
    """Save tickers to database"""
    try:
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        count = 0
        for ticker, info in tickers_data.items():
            cursor.execute('''
                INSERT OR REPLACE INTO us_stock_master
                (ticker, cik, company_name, priority, is_active, created_at, updated_at)
                VALUES (?, ?, ?, ?, 1, COALESCE(
                    (SELECT created_at FROM us_stock_master WHERE ticker = ?),
                    ?
                ), ?)
            ''', (
                ticker,
                info['cik'],
                info['company_name'],
                info['priority'],
                ticker,
                datetime.now().isoformat(),
                datetime.now().isoformat()
            ))
            count += 1

        conn.commit()
        conn.close()
        log_info('scraper_sec', 'Saved {} tickers to database'.format(count))
        return count

    except Exception as e:
        log_error('scraper_sec', 'Failed to save tickers: {}'.format(str(e)), exception=e)
        return 0


def update_company_info(ticker, cik):
    """Update company info from SEC submissions"""
    submissions = get_company_submissions(cik)

    if submissions['status'] != 'ok':
        return submissions

    company = submissions['company']

    try:
        conn = sqlite3.connect(DB_PATH)
        cursor = conn.cursor()

        cursor.execute('''
            UPDATE us_stock_master SET
                sic_code = ?,
                sic_description = ?,
                exchange = ?,
                fiscal_year_end = ?,
                entity_type = ?,
                category = ?,
                state_of_incorporation = ?,
                ein = ?,
                website = ?,
                updated_at = ?
            WHERE ticker = ?
        ''', (
            company.get('sic'),
            company.get('sic_description'),
            company.get('exchanges', [None])[0] if company.get('exchanges') else None,
            company.get('fiscal_year_end'),
            company.get('entity_type'),
            company.get('category'),
            company.get('state_of_incorporation'),
            company.get('ein'),
            company.get('website'),
            datetime.now().isoformat(),
            ticker
        ))

        conn.commit()
        conn.close()

        return {'status': 'ok', 'company': company}

    except Exception as e:
        log_error('scraper_sec', 'Failed to update company info: {}'.format(str(e)),
                  ticker=ticker, exception=e)
        return {'status': 'error', 'error': str(e)}


# Testing
if __name__ == '__main__':
    print("SEC EDGAR API Client Test")
    print("=" * 60)

    # Test 1: Get all tickers
    print("\n1. Fetching all tickers...")
    result = get_all_tickers()
    print("   Status: {}".format(result['status']))
    if result['status'] == 'ok':
        print("   Total tickers: {}".format(result['count']))

    # Test 2: Get Apple data
    print("\n2. Fetching Apple (AAPL) data...")
    cik = ticker_to_cik('AAPL')
    print("   CIK: {}".format(cik))

    if cik:
        metrics = get_all_metrics(cik)
        print("   Status: {}".format(metrics['status']))
        if metrics['status'] == 'ok':
            print("   Metrics found: {}/{}".format(
                metrics['metrics_count'], metrics['total_possible']))
            for name, data in list(metrics['metrics'].items())[:5]:
                print("   - {}: {:,.0f} ({})".format(
                    name, data['value'], data['end_date']))