#!/usr/bin/env python3
"""
US Stock GPT Analysis Cron Job
Processes one stock every 5 minutes from the queue
Uses stored Claude analysis data (metrics_snapshot) - NO SEC API calls
Runs via cron: 2-57/5 * * * *

Log file: /tmp/us_gpt_cron.log
Lock file: /tmp/us_gpt_cron.lock
"""

import os
import sys
import json
import sqlite3
import subprocess
import time
import fcntl
from datetime import datetime, timedelta

# Configuration
DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'
LOCK_FILE = '/tmp/us_gpt_cron.lock'
LOG_FILE = '/tmp/us_gpt_cron.log'
CODEX_PATH = '/bin/codex'
TEMP_OUTPUT = '/tmp/codex_gpt_output.txt'

# Retry configuration
MAX_RETRIES = 3
RETRY_DELAY_HOURS = 1


def log_print(message):
    """Print and log message"""
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = '{} | {}'.format(timestamp, message)
    print(log_line)
    sys.stdout.flush()
    try:
        with open(LOG_FILE, 'a') as f:
            f.write(log_line + '\n')
    except:
        pass


def get_db_conn():
    """Get database connection"""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn


def acquire_lock():
    """Acquire exclusive lock to prevent concurrent runs"""
    try:
        lock_file = open(LOCK_FILE, 'w')
        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
        lock_file.write(str(os.getpid()))
        lock_file.flush()
        return lock_file
    except IOError:
        return None


def release_lock(lock_file):
    """Release the lock"""
    if lock_file:
        try:
            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
            lock_file.close()
            os.remove(LOCK_FILE)
        except:
            pass


def get_next_stock():
    """Get the next stock to process from GPT queue"""
    conn = get_db_conn()
    cursor = conn.cursor()

    now = datetime.now().isoformat()

    cursor.execute('''
        SELECT ticker, cik, claude_timestamp, attempt_count
        FROM us_gpt_processing_queue
        WHERE status = 'pending'
          AND (next_run_after IS NULL OR next_run_after <= ?)
        ORDER BY claude_timestamp ASC
        LIMIT 1
    ''', (now,))

    row = cursor.fetchone()
    conn.close()

    if row:
        return {
            'ticker': row['ticker'],
            'cik': row['cik'],
            'claude_timestamp': row['claude_timestamp'],
            'attempt_count': row['attempt_count']
        }
    return None


def get_stored_data(ticker):
    """Get stored Claude analysis data for a ticker"""
    conn = get_db_conn()
    cursor = conn.cursor()

    # Get the most recent Claude analysis with metrics_snapshot
    cursor.execute('''
        SELECT a.ticker, a.cik, a.metrics_snapshot, a.timestamp,
               m.company_name, m.sic_description, m.sic_code, m.exchange
        FROM us_stock_analyses a
        LEFT JOIN us_stock_master m ON a.ticker = m.ticker
        WHERE a.ticker = ?
        ORDER BY a.timestamp DESC
        LIMIT 1
    ''', (ticker,))

    row = cursor.fetchone()
    conn.close()

    if not row or not row['metrics_snapshot']:
        return None

    try:
        metrics = json.loads(row['metrics_snapshot'])
        return {
            'ticker': row['ticker'],
            'cik': row['cik'],
            'company_name': row['company_name'],
            'sic_description': row['sic_description'],
            'sic_code': row['sic_code'],
            'exchange': row['exchange'],
            'metrics': metrics,
            'timestamp': row['timestamp']
        }
    except json.JSONDecodeError:
        return None


def get_growth_data(ticker):
    """Get growth data from us_calculated_metrics"""
    conn = get_db_conn()
    cursor = conn.cursor()

    cursor.execute('''
        SELECT revenue_growth, net_income_growth, eps_growth
        FROM us_calculated_metrics
        WHERE ticker = ?
        ORDER BY created_at DESC
        LIMIT 1
    ''', (ticker,))

    row = cursor.fetchone()
    conn.close()

    if row:
        return {
            'revenue_growth': row['revenue_growth'],
            'net_income_growth': row['net_income_growth'],
            'eps_growth': row['eps_growth']
        }
    return {}


def format_large_number(val):
    """Format large numbers with B/M/K suffix"""
    if val is None:
        return 'N/A'
    try:
        val = float(val)
        if abs(val) >= 1e12:
            return '${:.2f}T'.format(val / 1e12)
        elif abs(val) >= 1e9:
            return '${:.2f}B'.format(val / 1e9)
        elif abs(val) >= 1e6:
            return '${:.2f}M'.format(val / 1e6)
        elif abs(val) >= 1e3:
            return '${:.2f}K'.format(val / 1e3)
        else:
            return '${:.2f}'.format(val)
    except:
        return 'N/A'


def build_gpt_prompt(data, growth):
    """Build GPT analysis prompt from stored data"""
    metrics = data.get('metrics', {})

    def fmt_num(val, decimals=1, suffix=''):
        if val is None:
            return 'N/A'
        try:
            return '{:.{d}f}{s}'.format(float(val), d=decimals, s=suffix)
        except:
            return 'N/A'

    def fmt_growth(val):
        if val is None:
            return 'N/A'
        try:
            sign = '+' if float(val) >= 0 else ''
            return '{}{:.1f}%'.format(sign, float(val))
        except:
            return 'N/A'

    prompt = """Analyze this US company based on SEC EDGAR financial data.
IMPORTANT: No stock price data is available. Focus ONLY on fundamentals.

===============================================================================
COMPANY: {company_name} ({ticker})
CIK: {cik}
SECTOR: {sic_description} (SIC: {sic_code})
EXCHANGE: {exchange}
===============================================================================

INCOME STATEMENT (Latest Period)
-------------------------------------------------------------------------------
Revenue:              {revenue}    ({revenue_growth} YoY)
Gross Profit:         {gross_profit}
Operating Income:     {operating_income}
Net Income:           {net_income}    ({net_income_growth} YoY)
EPS (Diluted):        {eps}           ({eps_growth} YoY)

PROFITABILITY RATIOS
-------------------------------------------------------------------------------
Gross Margin:         {gross_margin}
Operating Margin:     {operating_margin}
Net Margin:           {net_margin}
ROE:                  {roe}
ROA:                  {roa}

BALANCE SHEET
-------------------------------------------------------------------------------
Total Assets:         {total_assets}
Total Liabilities:    {total_liabilities}
Stockholders Equity:  {equity}
Cash & Equivalents:   {cash}
Long-term Debt:       {long_term_debt}

LIQUIDITY & LEVERAGE
-------------------------------------------------------------------------------
Current Ratio:        {current_ratio}
Quick Ratio:          {quick_ratio}
Debt/Equity:          {debt_to_equity}
Interest Coverage:    {interest_coverage}
{equity_warning}
CASH FLOW
-------------------------------------------------------------------------------
Operating Cash Flow:  {operating_cf}
Capital Expenditure:  {capex}
Free Cash Flow:       {fcf}
FCF Margin:           {fcf_margin}

DATA QUALITY
-------------------------------------------------------------------------------
Data Freshness:       {data_as_of}
Metrics Available:    {metrics_available}

===============================================================================
INSTRUCTIONS: Analyze the fundamentals and provide your assessment.
DO NOT reference stock price, P/E ratio, or any valuation metrics.
Focus on: profitability trends, financial health, growth quality.

Respond with ONLY this JSON (no markdown, no explanation):
{{
  "rating": "STRONG_BUY" | "BUY" | "HOLD" | "SELL" | "STRONG_SELL",
  "confidence": 1-100,
  "thesis": "2-3 sentence investment thesis based on fundamentals",
  "strengths": ["strength1", "strength2", "strength3"],
  "risks": ["risk1", "risk2", "risk3"],
  "key_metrics_to_watch": ["metric1", "metric2"]
}}
""".format(
        company_name=data.get('company_name', 'Unknown'),
        ticker=data.get('ticker', ''),
        cik=data.get('cik', 'N/A'),
        sic_description=data.get('sic_description', 'N/A'),
        sic_code=data.get('sic_code', 'N/A'),
        exchange=data.get('exchange', 'N/A'),
        revenue=format_large_number(metrics.get('revenue')),
        revenue_growth=fmt_growth(growth.get('revenue_growth')),
        gross_profit=format_large_number(metrics.get('gross_profit')),
        operating_income=format_large_number(metrics.get('operating_income')),
        net_income=format_large_number(metrics.get('net_income')),
        net_income_growth=fmt_growth(growth.get('net_income_growth')),
        eps=fmt_num(metrics.get('eps_diluted'), 2),
        eps_growth=fmt_growth(growth.get('eps_growth')),
        gross_margin=fmt_num(metrics.get('gross_margin'), 1, '%'),
        operating_margin=fmt_num(metrics.get('operating_margin'), 1, '%'),
        net_margin=fmt_num(metrics.get('net_margin'), 1, '%'),
        roe=fmt_num(metrics.get('roe'), 1, '%'),
        roa=fmt_num(metrics.get('roa'), 1, '%'),
        total_assets=format_large_number(metrics.get('total_assets')),
        total_liabilities=format_large_number(metrics.get('total_liabilities')),
        equity=format_large_number(metrics.get('stockholders_equity')),
        cash=format_large_number(metrics.get('cash')),
        long_term_debt=format_large_number(metrics.get('long_term_debt')),
        current_ratio=fmt_num(metrics.get('current_ratio'), 2, 'x'),
        quick_ratio=fmt_num(metrics.get('quick_ratio'), 2, 'x'),
        debt_to_equity=fmt_num(metrics.get('debt_to_equity'), 2, 'x'),
        interest_coverage=fmt_num(metrics.get('interest_coverage'), 1, 'x'),
        equity_warning='*** WARNING: Negative stockholders equity - D/E and ROE not meaningful ***\n' if metrics.get('negative_equity') else '',
        operating_cf=format_large_number(metrics.get('operating_cash_flow')),
        capex=format_large_number(metrics.get('capex')),
        fcf=format_large_number(metrics.get('free_cash_flow')),
        fcf_margin=fmt_num(metrics.get('fcf_margin'), 1, '%'),
        data_as_of=metrics.get('data_as_of', 'N/A'),
        metrics_available=metrics.get('metrics_available', 0)
    )

    return prompt


def call_codex(prompt, timeout=180):
    """Call OpenAI Codex CLI for analysis (uses ChatGPT Plus subscription)"""
    import re
    import tempfile

    # Write prompt to temp file to avoid shell escaping issues
    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write(prompt)
        prompt_file = f.name

    # Codex exec command with output to file
    cmd = [
        CODEX_PATH, 'exec',
        '--skip-git-repo-check',
        '--output-last-message', TEMP_OUTPUT
    ]

    env = os.environ.copy()

    try:
        # Read prompt from stdin
        with open(prompt_file, 'r') as pf:
            result = subprocess.run(
                cmd,
                stdin=pf,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                universal_newlines=True,
                timeout=timeout,
                cwd=os.path.expanduser("~"),
                env=env
            )

        # Clean up prompt file
        os.unlink(prompt_file)

        if result.returncode != 0:
            error_msg = result.stderr.strip() or 'Unknown error'
            return {'status': 'error', 'error': error_msg}

        # Read response from output file
        response_text = ''
        if os.path.exists(TEMP_OUTPUT):
            with open(TEMP_OUTPUT, 'r') as f:
                response_text = f.read().strip()
            os.unlink(TEMP_OUTPUT)

        if not response_text:
            # Try stdout as fallback
            response_text = result.stdout.strip()

        # Parse JSON from response
        # Try to extract JSON from markdown code blocks
        json_block_pattern = r'```(?:json)?\s*(\{[\s\S]*?\})\s*```'
        matches = re.findall(json_block_pattern, response_text)
        for match in matches:
            try:
                data = json.loads(match)
                data['status'] = 'ok'
                return data
            except json.JSONDecodeError:
                continue

        # Try to find JSON object with rating field
        json_pattern = r'\{[^{}]*"rating"\s*:\s*"[^"]+?"[^{}]*\}'
        matches = re.findall(json_pattern, response_text, re.DOTALL)
        for match in matches:
            try:
                data = json.loads(match)
                data['status'] = 'ok'
                return data
            except json.JSONDecodeError:
                continue

        # Try full response as JSON
        try:
            data = json.loads(response_text)
            data['status'] = 'ok'
            return data
        except json.JSONDecodeError:
            pass

        # Try to find any valid JSON object
        brace_pattern = r'\{[^{}]+\}'
        matches = re.findall(brace_pattern, response_text)
        for match in matches:
            try:
                data = json.loads(match)
                if 'rating' in data:
                    data['status'] = 'ok'
                    return data
            except json.JSONDecodeError:
                continue

        return {'status': 'error', 'error': 'Could not parse JSON response', 'raw': response_text[:500]}

    except subprocess.TimeoutExpired:
        if os.path.exists(prompt_file):
            os.unlink(prompt_file)
        return {'status': 'error', 'error': 'Timeout'}
    except Exception as e:
        if os.path.exists(prompt_file):
            os.unlink(prompt_file)
        return {'status': 'error', 'error': str(e)}


def mark_processing(ticker):
    """Mark stock as currently processing"""
    conn = get_db_conn()
    cursor = conn.cursor()
    cursor.execute('''
        UPDATE us_gpt_processing_queue
        SET status = 'processing', last_attempt = ?
        WHERE ticker = ?
    ''', (datetime.now().isoformat(), ticker))
    conn.commit()
    conn.close()


def mark_completed(ticker):
    """Mark stock as completed"""
    conn = get_db_conn()
    cursor = conn.cursor()
    cursor.execute('''
        UPDATE us_gpt_processing_queue
        SET status = 'completed',
            attempt_count = attempt_count + 1,
            last_error = NULL
        WHERE ticker = ?
    ''', (ticker,))
    conn.commit()
    conn.close()


def mark_failed(ticker, error_message, attempt_count):
    """Mark stock as failed with retry scheduling"""
    conn = get_db_conn()
    cursor = conn.cursor()

    new_attempt_count = attempt_count + 1

    if new_attempt_count >= MAX_RETRIES:
        cursor.execute('''
            UPDATE us_gpt_processing_queue
            SET status = 'failed',
                attempt_count = ?,
                last_error = ?
            WHERE ticker = ?
        ''', (new_attempt_count, error_message, ticker))
    else:
        next_run = (datetime.now() + timedelta(hours=RETRY_DELAY_HOURS)).isoformat()
        cursor.execute('''
            UPDATE us_gpt_processing_queue
            SET status = 'pending',
                attempt_count = ?,
                last_error = ?,
                next_run_after = ?
            WHERE ticker = ?
        ''', (new_attempt_count, error_message, next_run, ticker))

    conn.commit()
    conn.close()


def save_gpt_analysis(ticker, cik, analysis, metrics, processing_time_ms):
    """Save GPT analysis to database"""
    conn = get_db_conn()
    cursor = conn.cursor()

    cursor.execute('''
        INSERT OR REPLACE INTO us_gpt_analyses
        (ticker, cik, rating, confidence, summary, positives, negatives,
         outlook, risk_level, metrics_snapshot, processing_time_ms, timestamp,
         key_metrics_to_watch)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        ticker,
        cik,
        analysis.get('rating'),
        analysis.get('confidence'),
        analysis.get('thesis'),
        json.dumps(analysis.get('strengths', [])),
        json.dumps(analysis.get('risks', [])),
        analysis.get('thesis'),  # Using thesis as outlook
        'MODERATE',  # Default risk level
        json.dumps(metrics),
        processing_time_ms,
        datetime.now().isoformat(),
        json.dumps(analysis.get('key_metrics_to_watch', []))
    ))

    conn.commit()
    conn.close()


def update_combined_scores(ticker):
    """Calculate and update combined scores from Claude and GPT"""
    conn = get_db_conn()
    cursor = conn.cursor()

    # Get Claude analysis
    cursor.execute('''
        SELECT rating, confidence
        FROM us_stock_analyses
        WHERE ticker = ?
        ORDER BY timestamp DESC
        LIMIT 1
    ''', (ticker,))
    claude = cursor.fetchone()

    # Get GPT analysis
    cursor.execute('''
        SELECT rating, confidence
        FROM us_gpt_analyses
        WHERE ticker = ?
        ORDER BY timestamp DESC
        LIMIT 1
    ''', (ticker,))
    gpt = cursor.fetchone()

    if not claude or not gpt:
        conn.close()
        return

    # Rating values for calculation
    rating_values = {
        'STRONG_BUY': 5,
        'BUY': 4,
        'HOLD': 3,
        'SELL': 2,
        'STRONG_SELL': 1
    }

    value_to_rating = {v: k for k, v in rating_values.items()}

    claude_val = rating_values.get(claude['rating'], 3)
    gpt_val = rating_values.get(gpt['rating'], 3)

    # Calculate combined score (weighted average: Claude 50%, GPT 50%)
    combined_val = round((claude_val + gpt_val) / 2)
    combined_rating = value_to_rating.get(combined_val, 'HOLD')

    # Calculate combined confidence (average of both)
    claude_conf = claude['confidence'] or 50
    gpt_conf = gpt['confidence'] or 50
    combined_conf = round((claude_conf + gpt_conf) / 2)

    # Determine agreement level
    diff = abs(claude_val - gpt_val)
    if diff == 0:
        agreement = 'STRONG_AGREEMENT'
    elif diff == 1:
        agreement = 'AGREEMENT'
    elif diff == 2:
        agreement = 'NEUTRAL'
    else:
        agreement = 'DISAGREEMENT'

    cursor.execute('''
        INSERT OR REPLACE INTO us_combined_scores
        (ticker, claude_rating, claude_confidence, gpt_rating, gpt_confidence,
         combined_rating, combined_confidence, agreement_level, last_updated)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        ticker,
        claude['rating'],
        claude_conf,
        gpt['rating'],
        gpt_conf,
        combined_rating,
        combined_conf,
        agreement,
        datetime.now().isoformat()
    ))

    conn.commit()
    conn.close()


def process_stock(ticker, cik, attempt_count):
    """Process a single stock with GPT analysis"""
    start_time = time.time()

    log_print('Processing {} (attempt: {})'.format(ticker, attempt_count + 1))

    try:
        mark_processing(ticker)

        # Get stored Claude data
        log_print('  Loading stored data...')
        data = get_stored_data(ticker)

        if not data:
            error_msg = 'No stored metrics_snapshot found'
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            return False

        # Get growth data
        growth = get_growth_data(ticker)

        # Build prompt
        log_print('  Building GPT prompt...')
        prompt = build_gpt_prompt(data, growth)

        if not prompt:
            error_msg = 'Failed to build prompt'
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            return False

        # Call GPT
        log_print('  Calling ChatGPT (Codex CLI)...')
        analysis = call_codex(prompt)

        if analysis.get('status') != 'ok':
            error_msg = analysis.get('error', 'GPT analysis failed')
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            return False

        # Validate response
        if 'rating' not in analysis:
            error_msg = 'Invalid response: missing rating'
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            return False

        processing_time_ms = int((time.time() - start_time) * 1000)

        # Save GPT analysis
        log_print('  Saving GPT analysis...')
        save_gpt_analysis(ticker, cik, analysis, data.get('metrics', {}), processing_time_ms)

        # Update combined scores
        log_print('  Updating combined scores...')
        update_combined_scores(ticker)

        mark_completed(ticker)

        log_print('  SUCCESS: {} | Confidence: {}% | Time: {:.1f}s'.format(
            analysis.get('rating'),
            analysis.get('confidence'),
            processing_time_ms / 1000
        ))
        return True

    except Exception as e:
        error_msg = str(e)
        log_print('  EXCEPTION: {}'.format(error_msg))
        mark_failed(ticker, error_msg, attempt_count)
        return False


def get_queue_stats():
    """Get queue statistics"""
    conn = get_db_conn()
    cursor = conn.cursor()

    cursor.execute('''
        SELECT status, COUNT(*) as count
        FROM us_gpt_processing_queue
        GROUP BY status
    ''')
    stats = {row['status']: row['count'] for row in cursor.fetchall()}
    conn.close()
    return stats


def main():
    """Main cron entry point"""
    log_print('=' * 60)
    log_print('US Stock GPT Analysis Cron Job Started')
    log_print('=' * 60)

    # Acquire lock
    lock_file = acquire_lock()
    if not lock_file:
        log_print('ERROR: Another GPT cron instance is already running')
        sys.exit(1)

    try:
        # Get queue stats
        stats = get_queue_stats()
        log_print('GPT Queue: pending={}, processing={}, completed={}, failed={}'.format(
            stats.get('pending', 0),
            stats.get('processing', 0),
            stats.get('completed', 0),
            stats.get('failed', 0)
        ))

        # Get next stock
        stock = get_next_stock()

        if not stock:
            log_print('No stocks in GPT queue to process')
            return

        # Process the stock
        success = process_stock(
            stock['ticker'],
            stock['cik'],
            stock['attempt_count']
        )

        # Final stats
        stats = get_queue_stats()
        log_print('')
        log_print('Updated GPT Queue: pending={}, completed={}, failed={}'.format(
            stats.get('pending', 0),
            stats.get('completed', 0),
            stats.get('failed', 0)
        ))

    finally:
        release_lock(lock_file)

    log_print('')
    log_print('GPT Cron job completed')
    log_print('=' * 60)


if __name__ == '__main__':
    main()