#!/usr/bin/env python3
"""
US Stock Analysis Cron Job
Processes one stock every 3 minutes from the queue
Runs via cron: */3 * * * *

Log file: /tmp/us_stock_cron.log
Lock file: /tmp/us_cron.lock
"""

import os
import sys
import json
import sqlite3
import subprocess
import time
import uuid
import fcntl
from datetime import datetime, timedelta

# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from scraper_sec import get_all_metrics, get_company_submissions, SP500_TOP
from us_data_enrichment import (
    enrich_stock, build_claude_prompt, calculate_ratios,
    save_metrics_to_db, save_analysis_to_db
)
from us_error_logger import (
    log_error, log_warning, log_info, log_cron_start, log_cron_end
)

# Configuration
DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'
LOCK_FILE = '/tmp/us_cron.lock'
LOG_FILE = '/tmp/us_stock_cron.log'

# Retry configuration
MAX_RETRIES = 3
RETRY_DELAY_HOURS = 24


def log_print(message):
    """Print and log message"""
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = '{} | {}'.format(timestamp, message)
    print(log_line)
    sys.stdout.flush()


def get_db_conn():
    """Get database connection"""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn


def acquire_lock():
    """Acquire exclusive lock to prevent concurrent runs"""
    try:
        lock_file = open(LOCK_FILE, 'w')
        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
        lock_file.write(str(os.getpid()))
        lock_file.flush()
        return lock_file
    except IOError:
        return None


def release_lock(lock_file):
    """Release the lock"""
    if lock_file:
        try:
            fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
            lock_file.close()
            os.remove(LOCK_FILE)
        except:
            pass


def get_next_stock():
    """Get the next stock to process from queue"""
    conn = get_db_conn()
    cursor = conn.cursor()

    now = datetime.now().isoformat()

    # Get highest priority pending stock that's ready to run
    cursor.execute('''
        SELECT ticker, cik, priority, attempt_count
        FROM us_processing_queue
        WHERE status = 'pending'
          AND (next_run_after IS NULL OR next_run_after <= ?)
        ORDER BY priority ASC, ticker ASC
        LIMIT 1
    ''', (now,))

    row = cursor.fetchone()
    conn.close()

    if row:
        return {
            'ticker': row['ticker'],
            'cik': row['cik'],
            'priority': row['priority'],
            'attempt_count': row['attempt_count']
        }
    return None


def mark_processing(ticker):
    """Mark stock as currently processing"""
    conn = get_db_conn()
    cursor = conn.cursor()
    cursor.execute('''
        UPDATE us_processing_queue
        SET status = 'processing', last_attempt = ?
        WHERE ticker = ?
    ''', (datetime.now().isoformat(), ticker))
    conn.commit()
    conn.close()


def mark_completed(ticker):
    """Mark stock as completed"""
    conn = get_db_conn()
    cursor = conn.cursor()
    cursor.execute('''
        UPDATE us_processing_queue
        SET status = 'completed',
            attempt_count = attempt_count + 1,
            last_error = NULL
        WHERE ticker = ?
    ''', (ticker,))
    conn.commit()
    conn.close()


def mark_skipped(ticker, reason):
    """Mark stock as permanently skipped (structural reason — never retry)"""
    conn = get_db_conn()
    cursor = conn.cursor()
    cursor.execute('''
        UPDATE us_processing_queue
        SET status = 'skipped',
            last_error = ?,
            next_run_after = NULL
        WHERE ticker = ?
    ''', ('Skipped: ' + reason, ticker))
    conn.commit()
    conn.close()


def mark_failed(ticker, error_message, attempt_count):
    """Mark stock as failed with retry scheduling"""
    conn = get_db_conn()
    cursor = conn.cursor()

    new_attempt_count = attempt_count + 1

    if new_attempt_count >= MAX_RETRIES:
        # Max retries reached, mark as permanently failed
        cursor.execute('''
            UPDATE us_processing_queue
            SET status = 'failed',
                attempt_count = ?,
                last_error = ?
            WHERE ticker = ?
        ''', (new_attempt_count, error_message, ticker))
    else:
        # Schedule retry
        next_run = (datetime.now() + timedelta(hours=RETRY_DELAY_HOURS)).isoformat()
        cursor.execute('''
            UPDATE us_processing_queue
            SET status = 'pending',
                attempt_count = ?,
                last_error = ?,
                next_run_after = ?
            WHERE ticker = ?
        ''', (new_attempt_count, error_message, next_run, ticker))

    conn.commit()
    conn.close()


def call_claude_cli(prompt, timeout=120, ticker=None):
    """Call Claude CLI for analysis"""
    system_prompt = "You are a financial analyst. Analyze the provided SEC financial data and respond with valid JSON only. No markdown, no explanations."
    cmd = ["claude", "-p", "--model", "haiku", "--system-prompt", system_prompt, prompt]

    # Clean environment
    env = os.environ.copy()
    keys_to_remove = [k for k in env.keys()
                      if 'CLAUDE' in k.upper() or 'MCP' in k.upper() or 'ANTHROPIC' in k.upper()]
    for k in keys_to_remove:
        env.pop(k, None)

    try:
        result = subprocess.run(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            timeout=timeout,
            cwd=os.path.expanduser("~"),
            env=env
        )

        response_text = result.stdout.strip()

        if result.returncode != 0:
            error_msg = result.stderr.strip() or 'Unknown error'
            return {'status': 'error', 'error': error_msg}

        # Parse JSON
        import re
        json_block_pattern = r'```(?:json)?\s*(\{[\s\S]*?\})\s*```'
        matches = re.findall(json_block_pattern, response_text)
        for match in matches:
            try:
                data = json.loads(match)
                data['status'] = 'ok'
                return data
            except json.JSONDecodeError:
                continue

        # Try raw JSON
        json_pattern = r'\{[^{}]*"rating"[^{}]*\}'
        matches = re.findall(json_pattern, response_text, re.DOTALL)
        for match in matches:
            try:
                data = json.loads(match)
                data['status'] = 'ok'
                return data
            except json.JSONDecodeError:
                continue

        # Try full response
        try:
            data = json.loads(response_text)
            data['status'] = 'ok'
            return data
        except json.JSONDecodeError:
            pass

        return {'status': 'error', 'error': 'Could not parse JSON response'}

    except subprocess.TimeoutExpired:
        return {'status': 'error', 'error': 'Timeout'}
    except Exception as e:
        return {'status': 'error', 'error': str(e)}


def process_stock(ticker, cik, attempt_count):
    """Process a single stock"""
    run_id = str(uuid.uuid4())[:8]
    start_time = time.time()

    log_print('Processing {} (CIK: {}, attempt: {})'.format(ticker, cik, attempt_count + 1))
    log_cron_start(run_id, ticker)

    try:
        # Mark as processing
        mark_processing(ticker)

        # Enrich stock data
        log_print('  Enriching data...')
        enriched = enrich_stock(ticker)

        if enriched['status'] == 'skipped':
            reason = enriched.get('reason', 'unknown reason')
            log_print('  SKIPPED: {}'.format(reason))
            mark_skipped(ticker, reason)
            log_cron_end(run_id, success=False, error_message='Skipped: ' + reason)
            return False

        if enriched['status'] != 'ok':
            error_msg = enriched.get('error', 'Enrichment failed')
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            log_cron_end(run_id, success=False, error_message=error_msg)
            return False

        # Save metrics
        log_print('  Saving metrics...')
        save_metrics_to_db(ticker, enriched['ratios'], enriched.get('growth', {}))

        # Build prompt
        log_print('  Building Claude prompt...')
        prompt = build_claude_prompt(enriched)

        if not prompt:
            error_msg = 'Failed to build prompt'
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            log_cron_end(run_id, success=False, error_message=error_msg)
            return False

        # Call Claude
        log_print('  Calling Claude...')
        analysis = call_claude_cli(prompt, ticker=ticker)

        if analysis['status'] != 'ok':
            error_msg = analysis.get('error', 'Claude analysis failed')
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            log_cron_end(run_id, success=False, error_message=error_msg)
            return False

        # Validate response
        if 'rating' not in analysis:
            error_msg = 'Invalid response: missing rating'
            log_print('  ERROR: {}'.format(error_msg))
            mark_failed(ticker, error_msg, attempt_count)
            log_cron_end(run_id, success=False, error_message=error_msg)
            return False

        # Calculate processing time
        processing_time_ms = int((time.time() - start_time) * 1000)

        # Save analysis
        log_print('  Saving analysis...')
        save_analysis_to_db(
            ticker,
            cik,
            analysis,
            enriched['ratios'],
            processing_time_ms
        )

        # Mark completed
        mark_completed(ticker)

        log_print('  SUCCESS: {} | Confidence: {}% | Time: {:.1f}s'.format(
            analysis.get('rating'),
            analysis.get('confidence'),
            processing_time_ms / 1000
        ))
        log_cron_end(run_id, success=True)
        return True

    except Exception as e:
        error_msg = str(e)
        log_print('  EXCEPTION: {}'.format(error_msg))
        log_error('cron', error_msg, ticker=ticker, exception=e)
        mark_failed(ticker, error_msg, attempt_count)
        log_cron_end(run_id, success=False, error_message=error_msg)
        return False


def get_queue_stats():
    """Get queue statistics"""
    conn = get_db_conn()
    cursor = conn.cursor()

    cursor.execute('''
        SELECT status, COUNT(*) as count
        FROM us_processing_queue
        GROUP BY status
    ''')
    stats = {row['status']: row['count'] for row in cursor.fetchall()}
    conn.close()
    return stats


def main():
    """Main cron entry point"""
    log_print('=' * 60)
    log_print('US Stock Analysis Cron Job Started')
    log_print('=' * 60)

    # Acquire lock
    lock_file = acquire_lock()
    if not lock_file:
        log_print('ERROR: Another instance is already running')
        sys.exit(1)

    try:
        # Get queue stats
        stats = get_queue_stats()
        log_print('Queue: pending={}, processing={}, completed={}, failed={}, skipped={}'.format(
            stats.get('pending', 0),
            stats.get('processing', 0),
            stats.get('completed', 0),
            stats.get('failed', 0),
            stats.get('skipped', 0)
        ))

        # Get next stock
        stock = get_next_stock()

        if not stock:
            log_print('No stocks in queue to process')
            log_print('Run "POST /us/api/refresh-universe" to populate the queue')
            return

        # Process the stock
        success = process_stock(
            stock['ticker'],
            stock['cik'],
            stock['attempt_count']
        )

        # Final stats
        stats = get_queue_stats()
        log_print('')
        log_print('Updated Queue: pending={}, completed={}, failed={}, skipped={}'.format(
            stats.get('pending', 0),
            stats.get('completed', 0),
            stats.get('failed', 0),
            stats.get('skipped', 0)
        ))

    finally:
        release_lock(lock_file)

    log_print('')
    log_print('Cron job completed')
    log_print('=' * 60)


if __name__ == '__main__':
    main()
