#!/usr/bin/env python3
"""
US Stock Post-Earnings Reanalysis Cron
Checks stocks that reported earnings 2-45 days ago for fresh SEC EDGAR data.
If new 10-Q/10-K data is available, triggers Claude reanalysis via the API.

Schedule: Daily at 6:00 AM IST (place BEFORE CRON_TZ line in crontab)
  0 6 * * * cd /home/mcmarketshost/public_html && /bin/python3 us_reanalysis_cron.py >> /tmp/us_reanalysis_cron.log 2>&1

Log file: /tmp/us_reanalysis_cron.log
Lock file: /tmp/us_reanalysis_cron.lock
"""

import os
import sys
import json
import sqlite3
import time
import fcntl
import requests
from datetime import datetime, timedelta

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from scraper_sec import get_all_metrics, ticker_to_cik
from us_error_logger import log_info, log_error

# Configuration
DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'
LOCK_FILE = '/tmp/us_reanalysis_cron.lock'
LOG_FILE = '/tmp/us_reanalysis_cron.log'
API_BASE = 'http://127.0.0.1:5051'
API_TIMEOUT = 300
SLEEP_BETWEEN_ANALYSES = 25
LOOKBACK_START_DAYS = 45
LOOKBACK_END_DAYS = 2

# Cloudflare cache purge
CF_PURGE_URL = 'https://api.cloudflare.com/client/v4/zones/d7219bce2202091079af508ede5c219a/purge_cache'
CF_AUTH_EMAIL = 'christopher.selvin@gmail.com'
CF_AUTH_TOKEN = '01d8844cf296960e36b9750066d90dcec7101'
CF_PURGE_BATCH = 30

RATING_VALUES = {
    'STRONG_BUY': 5,
    'BUY': 4,
    'HOLD': 3,
    'SELL': 2,
    'STRONG_SELL': 1
}
VALUE_TO_RATING = {v: k for k, v in RATING_VALUES.items()}


def log_print(message):
    """Print and log message (crontab >> redirect handles file output)"""
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_line = '{} | {}'.format(timestamp, message)
    print(log_line)
    sys.stdout.flush()


def get_db_conn():
    """Get database connection with 10-second busy timeout"""
    conn = sqlite3.connect(DB_PATH, timeout=10)
    conn.row_factory = sqlite3.Row
    return conn


def acquire_lock():
    """Acquire exclusive lock to prevent concurrent runs"""
    try:
        lock_file = open(LOCK_FILE, 'w')
        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
        lock_file.write(str(os.getpid()))
        lock_file.flush()
        return lock_file
    except IOError:
        return None


def release_lock(lock_file):
    """Release the lock"""
    if not lock_file:
        return
    try:
        fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
        lock_file.close()
        os.remove(LOCK_FILE)
    except Exception:
        pass


def check_server_health():
    """Check if the API server is running"""
    try:
        resp = requests.get('{}/api/health'.format(API_BASE), timeout=10)
        if resp.status_code == 200:
            data = resp.json()
            if data.get('status') == 'ok':
                return True
    except Exception:
        pass
    return False


def get_earnings_tickers(lookback_start, lookback_end):
    """
    Get distinct tickers that reported earnings between lookback_start
    and lookback_end days ago. Returns one row per ticker with the
    earliest earnings date in the window.
    """
    conn = get_db_conn()
    cursor = conn.cursor()

    date_from = (datetime.now() - timedelta(days=lookback_start)).strftime('%Y-%m-%d')
    date_to = (datetime.now() - timedelta(days=lookback_end)).strftime('%Y-%m-%d')

    cursor.execute('''
        SELECT ticker, MIN(earnings_date) as earliest_earnings
        FROM us_earnings_calendar
        WHERE earnings_date >= ? AND earnings_date <= ?
        GROUP BY ticker
        ORDER BY earliest_earnings ASC
    ''', (date_from, date_to))

    results = []
    for row in cursor.fetchall():
        results.append({
            'ticker': row['ticker'],
            'earnings_date': row['earliest_earnings']
        })

    conn.close()
    return results


def get_latest_data_freshness(ticker):
    """
    Get data_freshness from the most recent analysis for a ticker.
    Returns None if no analysis exists or data_freshness is NULL.
    """
    conn = get_db_conn()
    cursor = conn.cursor()

    cursor.execute('''
        SELECT data_freshness
        FROM us_stock_analyses
        WHERE ticker = ?
        ORDER BY timestamp DESC
        LIMIT 1
    ''', (ticker,))

    row = cursor.fetchone()
    conn.close()

    if row and row['data_freshness']:
        return row['data_freshness']
    return None


def get_sec_data_freshness(ticker):
    """
    Check SEC EDGAR for the latest data end_date across all metrics.
    Replicates the same MAX(end_date) logic used by calculate_ratios()
    in us_data_enrichment.py.

    Returns the max end_date string (YYYY-MM-DD) or None.
    """
    cik = ticker_to_cik(ticker)
    if not cik:
        return None

    result = get_all_metrics(cik)
    if result.get('status') != 'ok':
        return None

    metrics = result.get('metrics', {})
    max_date = None
    for m in metrics.values():
        if m and m.get('end_date'):
            ed = m['end_date']
            if max_date is None or ed > max_date:
                max_date = ed

    return max_date


def trigger_reanalysis(ticker):
    """
    Call the API to trigger full Claude reanalysis for a ticker.
    Returns a dict with 'status' ('ok', 'skipped', 'error'),
    and on success: 'rating', 'confidence', 'processing_time_ms'.
    """
    url = '{}/api/run-analysis/{}'.format(API_BASE, ticker)
    try:
        resp = requests.post(url, timeout=API_TIMEOUT)
        data = resp.json()
        return data
    except requests.exceptions.Timeout:
        return {'status': 'error', 'error': 'HTTP timeout ({}s)'.format(API_TIMEOUT)}
    except requests.exceptions.ConnectionError:
        return {'status': 'error', 'error': 'Connection refused - server down?'}
    except Exception as e:
        return {'status': 'error', 'error': str(e)}


def purge_cloudflare_cache(tickers):
    """Purge Cloudflare cache for reanalyzed stock page URLs."""
    if not tickers:
        return
    urls = ['https://www.marketshost.com/ai-score/us/{}'.format(t.lower()) for t in tickers]
    for i in range(0, len(urls), CF_PURGE_BATCH):
        batch = urls[i:i + CF_PURGE_BATCH]
        try:
            resp = requests.post(CF_PURGE_URL, json={'files': batch}, headers={
                'X-Auth-Email': CF_AUTH_EMAIL,
                'X-Auth-Key': CF_AUTH_TOKEN,
                'Content-Type': 'application/json',
            }, timeout=15)
            data = resp.json()
            if data.get('success'):
                log_print('  CF purge: {} URLs purged'.format(len(batch)))
            else:
                log_print('  CF purge failed: {}'.format(data.get('errors', [])))
        except Exception as e:
            log_print('  CF purge error: {}'.format(str(e)))


def update_combined_scores(ticker):
    """
    Recalculate combined scores after Claude reanalysis.
    Uses the same 50/50 formula as us_gpt_analysis_cron.py.
    If no GPT analysis exists, sets combined = Claude rating directly.
    """
    conn = get_db_conn()
    cursor = conn.cursor()

    # Get latest Claude analysis
    cursor.execute('''
        SELECT rating, confidence
        FROM us_stock_analyses
        WHERE ticker = ?
        ORDER BY timestamp DESC
        LIMIT 1
    ''', (ticker,))
    claude = cursor.fetchone()

    if not claude:
        conn.close()
        return False

    claude_rating = claude['rating']
    claude_conf = claude['confidence'] or 50

    # Get GPT analysis (may not exist)
    cursor.execute('''
        SELECT rating, confidence
        FROM us_gpt_analyses
        WHERE ticker = ?
        ORDER BY timestamp DESC
        LIMIT 1
    ''', (ticker,))
    gpt = cursor.fetchone()

    if gpt:
        gpt_rating = gpt['rating']
        gpt_conf = gpt['confidence'] or 50

        claude_val = RATING_VALUES.get(claude_rating, 3)
        gpt_val = RATING_VALUES.get(gpt_rating, 3)

        combined_val = round((claude_val + gpt_val) / 2.0)
        combined_rating = VALUE_TO_RATING.get(combined_val, 'HOLD')
        combined_conf = int(round((claude_conf + gpt_conf) / 2.0))

        diff = abs(claude_val - gpt_val)
        if diff == 0:
            agreement = 'STRONG_AGREEMENT'
        elif diff == 1:
            agreement = 'AGREEMENT'
        elif diff == 2:
            agreement = 'NEUTRAL'
        else:
            agreement = 'DISAGREEMENT'

        cursor.execute('''
            INSERT OR REPLACE INTO us_combined_scores
            (ticker, claude_rating, claude_confidence, gpt_rating, gpt_confidence,
             combined_rating, combined_confidence, agreement_level, last_updated)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            ticker,
            claude_rating,
            claude_conf,
            gpt_rating,
            gpt_conf,
            combined_rating,
            combined_conf,
            agreement,
            datetime.now().isoformat()
        ))
    else:
        # No GPT analysis — set combined = Claude
        cursor.execute('''
            INSERT OR REPLACE INTO us_combined_scores
            (ticker, claude_rating, claude_confidence, gpt_rating, gpt_confidence,
             combined_rating, combined_confidence, agreement_level, last_updated)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', (
            ticker,
            claude_rating,
            claude_conf,
            None,
            None,
            claude_rating,
            claude_conf,
            'N/A',
            datetime.now().isoformat()
        ))

    conn.commit()
    conn.close()
    return True


def main():
    """Main cron entry point"""
    log_print('=' * 60)
    log_print('US Stock Post-Earnings Reanalysis Cron Started')
    log_print('=' * 60)

    # Acquire lock
    lock_file = acquire_lock()
    if not lock_file:
        log_print('ERROR: Another instance is already running')
        sys.exit(1)

    try:
        # Check server health
        if not check_server_health():
            log_print('ERROR: API server not responding at {}'.format(API_BASE))
            log_error('reanalysis', 'API server not responding')
            return

        # Get earnings tickers from the lookback window
        tickers = get_earnings_tickers(LOOKBACK_START_DAYS, LOOKBACK_END_DAYS)
        log_print('Earnings tickers in {}-{} day window: {}'.format(
            LOOKBACK_END_DAYS, LOOKBACK_START_DAYS, len(tickers)))

        if not tickers:
            log_print('No earnings tickers to check')
            return

        # Counters
        checked = 0
        reanalyzed = 0
        skipped_null = 0
        skipped_no_new = 0
        skipped_sec_error = 0
        skipped_foreign = 0
        failed = 0
        reanalyzed_tickers = []

        for item in tickers:
            ticker = item['ticker']
            earnings_date = item['earnings_date']

            # Step 1: Get current analysis data_freshness
            current_freshness = get_latest_data_freshness(ticker)
            if current_freshness is None:
                skipped_null += 1
                continue

            # Step 2: Check SEC EDGAR for newer data
            sec_freshness = get_sec_data_freshness(ticker)
            checked += 1

            if sec_freshness is None:
                skipped_sec_error += 1
                continue

            if sec_freshness <= current_freshness:
                skipped_no_new += 1
                continue

            # Step 3: Fresh data available — trigger reanalysis
            log_print('REANALYZE: {} | Earnings: {} | Old: {} | New SEC: {}'.format(
                ticker, earnings_date, current_freshness, sec_freshness))

            result = trigger_reanalysis(ticker)

            if result.get('status') == 'ok':
                reanalyzed += 1
                reanalyzed_tickers.append(ticker)
                log_print('  SUCCESS: {} | {} | {}% | {:.1f}s'.format(
                    ticker,
                    result.get('rating'),
                    result.get('confidence'),
                    result.get('processing_time_ms', 0) / 1000.0
                ))

                # Update combined scores to reflect new Claude rating
                if update_combined_scores(ticker):
                    log_print('  Combined scores updated for {}'.format(ticker))
                else:
                    log_print('  WARNING: Could not update combined scores for {}'.format(ticker))

            elif result.get('status') == 'skipped':
                skipped_foreign += 1
                log_print('  SKIPPED: {} | {}'.format(
                    ticker, result.get('reason', 'unknown')))

            else:
                failed += 1
                log_print('  FAILED: {} | {}'.format(
                    ticker, result.get('error', 'unknown error')))
                log_error('reanalysis', result.get('error', 'unknown'),
                          ticker=ticker)

            # Pace between analysis calls (not needed after skips/failures)
            if result.get('status') == 'ok':
                time.sleep(SLEEP_BETWEEN_ANALYSES)

        # Summary
        log_print('')
        log_print('=' * 60)
        log_print('SUMMARY')
        log_print('  Tickers in window:     {}'.format(len(tickers)))
        log_print('  SEC freshness checked: {}'.format(checked))
        log_print('  Reanalyzed:            {}'.format(reanalyzed))
        log_print('  Skipped (NULL data):   {}'.format(skipped_null))
        log_print('  Skipped (no new SEC):  {}'.format(skipped_no_new))
        log_print('  Skipped (SEC error):   {}'.format(skipped_sec_error))
        log_print('  Skipped (foreign):     {}'.format(skipped_foreign))
        log_print('  Failed:                {}'.format(failed))
        log_print('=' * 60)

        # Purge Cloudflare cache for reanalyzed pages
        if reanalyzed_tickers:
            log_print('Purging Cloudflare cache for {} reanalyzed pages...'.format(
                len(reanalyzed_tickers)))
            purge_cloudflare_cache(reanalyzed_tickers)

        log_info('reanalysis',
                 'Cron complete: {} reanalyzed, {} checked, {} failed'.format(
                     reanalyzed, checked, failed))

    except Exception as e:
        log_print('FATAL ERROR: {}'.format(str(e)))
        log_error('reanalysis', str(e), exception=e)

    finally:
        release_lock(lock_file)

    log_print('')
    log_print('Cron job completed')
    log_print('=' * 60)


if __name__ == '__main__':
    main()
