#!/usr/bin/env python3
"""
US Stock Earnings Calendar Cron
Fetches upcoming earnings dates from NASDAQ public API for the next 14 days
and upserts into us_earnings_calendar table.

Runs daily at 01:00 IST via crontab.
Log file: /tmp/us_earnings_cron.log
Lock file: /tmp/us_earnings_cron.lock
"""

import os
import sys
import json
import sqlite3
import fcntl
import time
import urllib.request
import urllib.error
from datetime import datetime, timedelta
import pytz

EASTERN = pytz.timezone('America/New_York')

DB_PATH = '/home/mcmarketshost/public_html/us_stock_analysis.db'
LOG_FILE = '/tmp/us_earnings_cron.log'
LOCK_FILE = '/tmp/us_earnings_cron.lock'
NASDAQ_URL = 'https://api.nasdaq.com/api/calendar/earnings?date={date}'
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36'
DAYS_AHEAD = 14
RETENTION_DAYS = 50  # must exceed reanalysis LOOKBACK_START_DAYS (45)


def log(msg):
    line = '{} | {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg)
    print(line)
    sys.stdout.flush()
    try:
        with open(LOG_FILE, 'a') as f:
            f.write(line + '\n')
    except Exception:
        pass


def acquire_lock():
    try:
        f = open(LOCK_FILE, 'w')
        fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
        f.write(str(os.getpid()))
        f.flush()
        return f
    except IOError:
        return None


def release_lock(f):
    if not f:
        return
    try:
        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
        f.close()
        os.remove(LOCK_FILE)
    except Exception:
        pass


def ensure_table(conn):
    conn.execute('''
        CREATE TABLE IF NOT EXISTS us_earnings_calendar (
            ticker TEXT NOT NULL,
            earnings_date DATE NOT NULL,
            company_name TEXT,
            earnings_time TEXT,
            eps_estimate REAL,
            eps_prior REAL,
            num_estimates INTEGER,
            fiscal_quarter TEXT,
            market_cap REAL,
            last_updated DATETIME DEFAULT CURRENT_TIMESTAMP,
            PRIMARY KEY (ticker, earnings_date)
        )
    ''')
    conn.execute('CREATE INDEX IF NOT EXISTS idx_earnings_date ON us_earnings_calendar(earnings_date)')
    conn.commit()


def parse_dollar(s):
    if not s or s == 'N/A':
        return None
    try:
        return float(str(s).replace('$', '').replace(',', '').strip())
    except (ValueError, AttributeError):
        return None


def parse_int(s):
    if s is None or s == '':
        return None
    try:
        return int(str(s).replace(',', '').strip())
    except (ValueError, AttributeError):
        return None


def fetch_nasdaq(date_str, retries=2):
    url = NASDAQ_URL.format(date=date_str)
    last_err = None
    for attempt in range(retries + 1):
        try:
            req = urllib.request.Request(url, headers={
                'User-Agent': USER_AGENT,
                'Accept': 'application/json, text/plain, */*',
                'Accept-Language': 'en-US,en;q=0.9',
            })
            with urllib.request.urlopen(req, timeout=15) as resp:
                return json.loads(resp.read().decode('utf-8'))
        except (urllib.error.URLError, json.JSONDecodeError, TimeoutError) as e:
            last_err = e
            if attempt < retries:
                time.sleep(2 ** attempt)
    log('  NASDAQ fetch failed for {}: {}'.format(date_str, last_err))
    return None


def get_analyzed_tickers(conn):
    return set(r[0].upper() for r in conn.execute('SELECT DISTINCT ticker FROM us_stock_analyses'))


def upsert_earnings(conn, date_str, rows, analyzed):
    cursor = conn.cursor()
    inserted = 0
    matched = 0
    for r in rows or []:
        sym = (r.get('symbol') or '').strip().upper()
        if not sym:
            continue
        if sym not in analyzed:
            continue  # only store tickers we have AI ratings for
        matched += 1

        # NASDAQ time codes -> our codes
        time_raw = (r.get('time') or '').strip().lower()
        if 'pre-market' in time_raw:
            etime = 'BMO'
        elif 'after-hours' in time_raw:
            etime = 'AMC'
        elif 'not-supplied' in time_raw or not time_raw:
            etime = None
        else:
            etime = 'TNS'  # time not supplied / during market

        cursor.execute('''
            INSERT INTO us_earnings_calendar
            (ticker, earnings_date, company_name, earnings_time, eps_estimate,
             eps_prior, num_estimates, fiscal_quarter, market_cap, last_updated)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
            ON CONFLICT(ticker, earnings_date) DO UPDATE SET
                company_name=excluded.company_name,
                earnings_time=excluded.earnings_time,
                eps_estimate=excluded.eps_estimate,
                eps_prior=excluded.eps_prior,
                num_estimates=excluded.num_estimates,
                fiscal_quarter=excluded.fiscal_quarter,
                market_cap=excluded.market_cap,
                last_updated=CURRENT_TIMESTAMP
        ''', (
            sym,
            date_str,
            (r.get('name') or '').strip(),
            etime,
            parse_dollar(r.get('epsForecast')),
            parse_dollar(r.get('lastYearEPS')),
            parse_int(r.get('noOfEsts')),
            (r.get('fiscalQuarterEnding') or '').strip() or None,
            parse_dollar(r.get('marketCap')),
        ))
        inserted += 1
    conn.commit()
    return inserted, matched


def purge_old(conn):
    cutoff = (datetime.now(EASTERN).date() - timedelta(days=RETENTION_DAYS)).strftime('%Y-%m-%d')
    cursor = conn.cursor()
    cursor.execute('DELETE FROM us_earnings_calendar WHERE earnings_date < ?', (cutoff,))
    deleted = cursor.rowcount
    conn.commit()
    return deleted


def main():
    log('=' * 60)
    log('US Earnings Calendar Cron Started')
    log('=' * 60)

    lock = acquire_lock()
    if not lock:
        log('Another instance running, exiting.')
        return

    try:
        conn = sqlite3.connect(DB_PATH)
        ensure_table(conn)
        analyzed = get_analyzed_tickers(conn)
        log('Analyzed universe: {} tickers'.format(len(analyzed)))

        today = datetime.now(EASTERN).date()  # NASDAQ calendar dates are Eastern
        total_inserted = 0
        total_matched = 0
        total_seen = 0
        days_with_data = 0

        for offset in range(DAYS_AHEAD):
            d = today + timedelta(days=offset)
            date_str = d.strftime('%Y-%m-%d')
            payload = fetch_nasdaq(date_str)
            if not payload:
                continue
            rows = ((payload.get('data') or {}).get('rows')) or []
            total_seen += len(rows)
            if rows:
                days_with_data += 1
            ins, m = upsert_earnings(conn, date_str, rows, analyzed)
            total_inserted += ins
            total_matched += m
            log('  {} ({}): {} entries from NASDAQ, {} matched, {} upserted'.format(
                date_str, d.strftime('%a'), len(rows), m, ins))
            time.sleep(0.5)  # be polite to NASDAQ

        deleted = purge_old(conn)
        log('Purged {} entries older than {} days'.format(deleted, RETENTION_DAYS))

        log('Summary: {} days with data, {} NASDAQ entries seen, {} matched, {} upserted'.format(
            days_with_data, total_seen, total_matched, total_inserted))

        cursor = conn.cursor()
        cursor.execute('SELECT COUNT(*), MIN(earnings_date), MAX(earnings_date) FROM us_earnings_calendar')
        cnt, dmin, dmax = cursor.fetchone()
        log('Table now has {} rows ({} to {})'.format(cnt, dmin, dmax))
        conn.close()
    except Exception as e:
        log('FATAL: {}'.format(e))
        import traceback
        log(traceback.format_exc())
    finally:
        release_lock(lock)
        log('Cron job completed')
        log('=' * 60 + '\n')


if __name__ == '__main__':
    main()
