import os
import psycopg2
import pandas as pd
from psycopg2.extras import execute_values
from datetime import datetime

# Adjust as per actual DB configuration (can also use python-dotenv)
DB_HOST = os.environ.get("DB_HOST", "127.0.0.1")
DB_PORT = os.environ.get("DB_PORT", "5432")
DB_NAME = os.environ.get("DB_DATABASE", "multitv_sms")
DB_USER = os.environ.get("DB_USERNAME", "postgres")
DB_PASSWORD = os.environ.get("DB_PASSWORD", "postgres")

def get_db_connection():
    return psycopg2.connect(
        host=DB_HOST,
        port=DB_PORT,
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD
    )

def execute_feature_engineering():
    print("Starting Feature Engineering Engine...")
    conn = get_db_connection()
    
    # Query all active or recently expired subscriptions
    query = """
    WITH ord AS (
        SELECT subscriber_id, SUM(total) as actual_total_revenue 
        FROM tbl_order 
        WHERE status = 2 
        GROUP BY subscriber_id
    ),
    dev AS (
        SELECT customer_id, device_type 
        FROM (
            SELECT customer_id, device_type, ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY last_login DESC) as rn
            FROM customer_device
            WHERE device_type IS NOT NULL
        ) ranked
        WHERE rn = 1
    ),
    pkg AS (
        SELECT id, price as package_price
        FROM tbl_packages
    )
    SELECT 
        sub.sms_cust_id as customer_id, 
        sub.app_id, 
        sub.start_date, 
        sub.end_date, 
        COALESCE(ord.actual_total_revenue, sub.price) as total_revenue,
        COALESCE(dev.device_type, 'unknown') as device_type,
        sub.auto_renew
    FROM tbl_customer_subscriptions sub
    LEFT JOIN ord ON sub.sms_cust_id = ord.subscriber_id
    LEFT JOIN dev ON sub.sms_cust_id = dev.customer_id
    LEFT JOIN pkg ON sub.package_id = pkg.id
    """
    
    df = pd.read_sql_query(query, conn)
    
    # Feature 1: Days Until Expiry
    now = pd.Timestamp.now()
    df['end_date'] = pd.to_datetime(df['end_date'], errors='coerce')
    df['start_date'] = pd.to_datetime(df['start_date'], errors='coerce')
    
    df['days_until_expiry'] = (df['end_date'] - now).dt.days
    df['days_until_expiry'] = df['days_until_expiry'].fillna(-1).astype(int)
    
    # Feature 2: Tenure (Days since sign up)
    df['tenure_days'] = (now - df['start_date']).dt.days
    df['tenure_days'] = df['tenure_days'].fillna(0).astype(int)
    
    # Fallback missing data
    df['total_revenue'] = df['total_revenue'].fillna(0)
    df['device_type'] = df['device_type'].fillna('unknown')
    df['computed_at'] = datetime.now()
    
    # We will orchestrate writing these back out to churn_feature_store
    insert_query = """
        INSERT INTO churn_feature_store 
        (customer_id, app_id, days_until_expiry, total_revenue, device_type, computed_at)
        VALUES %s
        ON CONFLICT (customer_id, app_id) 
        DO UPDATE SET 
            days_until_expiry = EXCLUDED.days_until_expiry,
            total_revenue = EXCLUDED.total_revenue,
            computed_at = EXCLUDED.computed_at;
    """
    
    records = df[['customer_id', 'app_id', 'days_until_expiry', 'total_revenue', 'device_type', 'computed_at']].to_records(index=False)
    
    cursor = conn.cursor()
    execute_values(cursor, insert_query, records.tolist())
    conn.commit()
    
    print(f"Successfully processed and seeded {len(records)} feature rows into churn_feature_store.")
    
    cursor.close()
    conn.close()

if __name__ == "__main__":
    execute_feature_engineering()
