# verify_categories.py
"""
Script to verify and show category distribution after update
"""

import sys
import os
from sqlalchemy import create_engine, text
from tabulate import tabulate
import logging

# Add backend directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

from config import settings

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def verify_categories():
    """Show detailed category distribution"""
    
    engine = create_engine(settings.DATABASE_URL)
    
    with engine.connect() as conn:
        # Overall category distribution
        print("\n" + "="*80)
        print("OVERALL CATEGORY DISTRIBUTION")
        print("="*80)
        
        query = text("""
            SELECT 
                COALESCE(category, 'uncategorized') as category,
                COUNT(*) as activity_count,
                COUNT(DISTINCT developer_id) as developers,
                ROUND(SUM(duration) / 3600.0, 2) as total_hours,
                ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) as percentage
            FROM activity_records
            GROUP BY category
            ORDER BY activity_count DESC
        """)
        
        result = conn.execute(query).fetchall()
        
        headers = ["Category", "Activities", "Developers", "Hours", "Percentage"]
        print(tabulate(result, headers=headers, tablefmt="grid"))
        
        # Productive subcategories
        print("\n" + "="*80)
        print("PRODUCTIVE SUBCATEGORIES")
        print("="*80)
        
        query = text("""
            SELECT 
                COALESCE(subcategory, 'general') as subcategory,
                COUNT(*) as count,
                ROUND(SUM(duration) / 3600.0, 2) as hours
            FROM activity_records
            WHERE category = 'productive'
            GROUP BY subcategory
            ORDER BY count DESC
            LIMIT 15
        """)
        
        result = conn.execute(query).fetchall()
        
        headers = ["Subcategory", "Count", "Hours"]
        print(tabulate(result, headers=headers, tablefmt="grid"))
        
        # Sample activities for each category
        print("\n" + "="*80)
        print("SAMPLE ACTIVITIES BY CATEGORY")
        print("="*80)
        
        categories = ['productive', 'browser', 'server', 'non-work']
        
        for category in categories:
            print(f"\n--- {category.upper()} ---")
            
            query = text("""
                SELECT 
                    LEFT(window_title, 80) as title,
                    application_name as app,
                    subcategory
                FROM activity_records
                WHERE category = :category
                AND window_title IS NOT NULL
                ORDER BY RANDOM()
                LIMIT 5
            """)
            
            result = conn.execute(query, {"category": category}).fetchall()
            
            for row in result:
                title = row[0] if row[0] else "No title"
                app = row[1] if row[1] else "No app"
                subcat = row[2] if row[2] else "general"
                print(f"  • {title}")
                print(f"    App: {app} | Subcategory: {subcat}")
        
        # Activities that might need review
        print("\n" + "="*80)
        print("UNCATEGORIZED ACTIVITIES (Need Review)")
        print("="*80)
        
        query = text("""
            SELECT 
                LEFT(window_title, 60) as title,
                application_name as app,
                COUNT(*) as count
            FROM activity_records
            WHERE category IS NULL
            GROUP BY window_title, application_name
            ORDER BY count DESC
            LIMIT 10
        """)
        
        result = conn.execute(query).fetchall()
        
        if result:
            headers = ["Window Title", "Application", "Count"]
            print(tabulate(result, headers=headers, tablefmt="grid"))
        else:
            print("✓ All activities are categorized!")

def show_productivity_by_developer():
    """Show productivity statistics by developer"""
    
    engine = create_engine(settings.DATABASE_URL)
    
    with engine.connect() as conn:
        print("\n" + "="*80)
        print("PRODUCTIVITY BY DEVELOPER")
        print("="*80)
        
        query = text("""
            WITH developer_stats AS (
                SELECT 
                    developer_id,
                    category,
                    SUM(duration) as category_duration
                FROM activity_records
                WHERE category IS NOT NULL
                GROUP BY developer_id, category
            ),
            developer_totals AS (
                SELECT 
                    developer_id,
                    SUM(duration) as total_duration
                FROM activity_records
                GROUP BY developer_id
            )
            SELECT 
                ds.developer_id,
                ROUND(dt.total_duration / 3600.0, 2) as total_hours,
                ROUND(SUM(CASE WHEN ds.category = 'productive' THEN ds.category_duration ELSE 0 END) / 3600.0, 2) as productive_hours,
                ROUND(SUM(CASE WHEN ds.category = 'browser' THEN ds.category_duration ELSE 0 END) / 3600.0, 2) as browser_hours,
                ROUND(SUM(CASE WHEN ds.category = 'server' THEN ds.category_duration ELSE 0 END) / 3600.0, 2) as server_hours,
                ROUND(100.0 * SUM(CASE WHEN ds.category = 'productive' THEN ds.category_duration ELSE 0 END) / dt.total_duration, 2) as productivity_percentage
            FROM developer_stats ds
            JOIN developer_totals dt ON ds.developer_id = dt.developer_id
            GROUP BY ds.developer_id, dt.total_duration
            ORDER BY productivity_percentage DESC
        """)
        
        result = conn.execute(query).fetchall()
        
        headers = ["Developer", "Total Hours", "Productive", "Browser", "Server", "Productivity %"]
        print(tabulate(result, headers=headers, tablefmt="grid"))

if __name__ == "__main__":
    verify_categories()
    show_productivity_by_developer()