"""
Four Categories:
1) PRODUCTIVE  → All work-related activities
2) BROWSER     → YouTube, Gmail, social, entertainment, search, ALL MAIL, generic pages
3) SERVER      → AWS, GCP, Azure, SSH, Docker, monitoring tools, Termius
4) NON-WORK    → Untitled, system lock / idle / AFK / screensaver (ActivityWatch)
"""

import re
from typing import Dict, List, Tuple


class ActivityCategorizer:
    def __init__(self):

        # 🟥 NON-WORK (Untitled + ActivityWatch Lock / Idle / AFK / Screensaver)
        self.non_work_keywords = [
            # Untitled windows - MUST BE FIRST
            "untitled", "new tab", "blank", "empty","general"
            
            # Windows Lock Screen
            "lockapp.exe", "lockapp",
            "lock screen", "sessionlock", "session locked",
            "windows default lock screen",

            # Idle / AFK states
            "idle", "idle-time", "afk", "away",
            "not active", "userinactive",
            "no active window",

            # Screensaver
            "screensaver", "screen saver"
        ]

        # 🟧 BROWSER (non-productive browsing + ALL EMAIL + generic pages)
        self.browser_keywords = [
            # New tabs and incognito - ADD THESE AT THE TOP
            "new tab", "incognito", "new incognito tab", 
            "private browsing", "inprivate",
            
            # Generic/meaningless page titles
            "home", "welcome", "search", "open", "loading",
            "start", "page", "default", "portal", "dashboard",
            "main", "index", "site", "web", "online",
            "hello", "hi", "greetings", "welcome page",
            "home page", "start page", "new page",
            
            # Email - ALL variations
            "inbox", "compose", "mail", "email",
            "@firsteconomy.com", "@gmail.com", "@yahoo.com", 
            "@outlook.com", "@hotmail.com",
            "first economy mail", "gmail", "yahoo mail", "outlook",
            "protonmail", "thunderbird", "webmail",
            
            # Video / Entertainment
            "youtube", "youtu.be", "netflix", "amazon prime",
            "primevideo", "hotstar", "spotify", "twitch",

            # Social Media
            "facebook", "instagram", "snapchat", "tiktok",
            "pinterest", "reddit", "twitter", "x.com",
            "whatsapp", "telegram",

            # Search engines
            "google.com/search", "bing.com/search",
            "duckduckgo",
            
            # General browsing indicators
            "google.com", "bing.com", "yahoo.com"
        ]

        # 🟦 SERVER / DEVOPS
        self.server_keywords = [
            # SSH/Terminal clients
            "termius", "putty", "mobaxterm", "securecrt",
            "terminal", "iterm", "hyper", "kitty",
            "windows terminal", "powershell", "bash",
            
            # Cloud providers
            "aws", "ec2", "s3", "lambda", "iam", "cloudwatch",
            "azure", "microsoft azure",
            "gcp", "google cloud", "firebase",
            "digitalocean", "droplet",
            "linode", "vultr",
            "vercel", "netlify",
            "cloudflare", "godaddy", "namecheap",

            # DevOps tools
            "jenkins", "github actions", "gitlab ci", "circleci",
            "docker", "kubernetes", "k8s", "pods", "cluster",

            # Monitoring
            "grafana", "prometheus", "datadog", "new relic", "sentry",

            # Remote Access
            "ssh", "rdp", "vnc", "teamviewer", "anydesk", "openvpn"
        ]

        # 🟩 PRODUCTIVE (default)
        self.productive_keywords = [
            # IDEs
            "visual studio", "vs code", "vscode", "cursor",
            "intellij", "pycharm", "phpstorm", "webstorm",

            # Coding files
            ".py", ".js", ".ts", ".php", ".jsx", ".tsx",
            ".html", ".css", ".json", ".sql",

            # Local development servers
            "localhost", "127.0.0.1",
            ":3000", ":8000", ":5000", ":4200",

            # Developer tools
            "postman", "insomnia",
            "github.com", "gitlab.com", "bitbucket",
            "stack overflow",

            # AI tools
            "chatgpt", "claude", "bard", "perplexity", "phind",

            # PM tools
            "jira", "notion", "trello", "asana",
            "confluence", "clickup",

            # Design
            "figma", "adobe xd", "photoshop", "illustrator",

            # Dev server tools
            "cpanel", "phpmyadmin", "filezilla",

            # CMS
            "wordpress", "wp-admin", "wp-login"
        ]

    def categorize_activity(self, window_title: str, app_name: str = "") -> Tuple[str, float]:
        text = f"{window_title} {app_name}".lower()
        window_lower = window_title.lower().strip()
        
        # SPECIAL CHECK: Empty or truly untitled windows -> non-work
        if window_lower in ["", "untitled", "blank"]:
            return ("non-work", 1.0)
        
        # NEW CHECK: Browser new tabs and incognito tabs -> browser
        if any(tab_word in window_lower for tab_word in ["new tab", "incognito", "new incognito tab", "private browsing", "inprivate"]):
            return ("browser", 1.0)
        
        # NEW CHECK: Very short generic titles (1-2 words, generic) -> browser
        words = window_lower.split()
        if len(words) <= 2 and len(window_lower) < 20:
            generic_single_words = ["home", "welcome", "search", "open", "loading", 
                                   "start", "page", "default", "portal", "dashboard",
                                   "main", "index", "site", "web", "online",
                                   "hello", "hi", "greetings"]
            if any(word in generic_single_words for word in words):
                return ("browser", 1.0)

        # 🔹 SPECIAL CASES: Termius and other SSH clients -> SERVER
        if any(ssh_client in text for ssh_client in ["termius", "putty", "mobaxterm", "securecrt"]):
            return ("server", 1.0)

        # 🔹 SPECIAL CASES: tools that are always PRODUCTIVE
        # FileZilla (FTP / deployment) and WordPress admin
        if ("filezilla" in text or
            "wordpress" in text or
            "wp-admin" in text or
            "wp-login" in text):
            return ("productive", 1.0)

        # 1️⃣ NON-WORK (system lock / idle / screensaver / untitled)
        for word in self.non_work_keywords:
            if word in text:
                return ("non-work", 1.0)

        # 2️⃣ BROWSER (INCLUDES EMAIL, SOCIAL, ENTERTAINMENT)
        # A bit smarter: treat '@' as email only when not clearly dev tools
        email_indicators = ["inbox", "compose", "email", " mail ", "all mail"]
        if any(e in text for e in email_indicators):
            return ("browser", 1.0)

        # '@' looks like email → browser, EXCEPT in dev/server tools
        dev_like_with_at = ["filezilla", "ssh", "ftp", "sftp", "rdp", "wordpress", "wp-admin", "wp-login", "termius"]
        if "@" in text and not any(dev in text for dev in dev_like_with_at):
            return ("browser", 1.0)
        
        for word in self.browser_keywords:
            if word in text:
                return ("browser", 0.95)

        # 3️⃣ SERVER
        for word in self.server_keywords:
            if word in text:
                return ("server", 0.95)

        # 4️⃣ PRODUCTIVE (default for everything else)
        # But double-check it's not an email that slipped through
        if any(email_indicator in text for email_indicator in ["inbox", "compose mail", "first economy"]):
            return ("browser", 0.95)
            
        return ("productive", 0.90)

    def get_detailed_category(self, window_title: str, app_name: str = "") -> Dict:
        category, confidence = self.categorize_activity(window_title, app_name)
        text = f"{window_title} {app_name}".lower()

        if category == "non-work":
            if "untitled" in text.lower():
                sub = "untitled"
            elif "lock" in text:
                sub = "system-lock"
            elif "idle" in text or "afk" in text:
                sub = "idle"
            else:
                sub = "non-work"
        elif category == "browser":
            if "@" in text or "inbox" in text or "mail" in text:
                sub = "email"
            elif "youtube" in text:
                sub = "entertainment"
            elif "?q=" in text or "search" in text:
                sub = "search"
            elif "incognito" in text or "new tab" in text:
                sub = "browser-tabs"
            elif any(word in text for word in ["home", "welcome", "open", "start", "page"]):
                sub = "generic-pages"
            else:
                sub = "general-browsing"
        elif category == "server":
            if "termius" in text:
                sub = "ssh-client"
            elif "aws" in text:
                sub = "aws"
            elif "azure" in text:
                sub = "azure"
            elif "gcp" in text:
                sub = "gcp"
            elif any(ssh in text for ssh in ["ssh", "putty", "terminal"]):
                sub = "terminal-ssh"
            else:
                sub = "server-tools"
        else:  # PRODUCTIVE
            if "vscode" in text or "code.exe" in text:
                sub = "coding"
            elif "localhost" in text:
                sub = "dev-server"
            elif "postman" in text:
                sub = "api-testing"
            elif "figma" in text:
                sub = "design"
            elif "filezilla" in text:
                sub = "dev-server-tools"
            elif "wordpress" in text or "wp-admin" in text or "wp-login" in text:
                sub = "cms-wordpress"
            else:
                sub = "productive-general"

        return {
            "category": category,
            "subcategory": sub,
            "confidence": confidence,
            "window_title": window_title,
            "app_name": app_name
        }

    def categorize_batch(self, activities: List[Dict]) -> List[Dict]:
        categorized = []
        for activity in activities:
            info = self.get_detailed_category(
                activity.get("window_title", ""),
                activity.get("application_name", "")
            )
            activity.update(info)
            categorized.append(activity)

        return categorized


# Example usage and tests
if __name__ == "__main__":
    categorizer = ActivityCategorizer()
    
    # Test cases to verify the new categorization
    test_cases = [
        # Generic pages -> browser
        ("Home", "chrome.exe"),  # browser
        ("Welcome", "chrome.exe"),  # browser
        ("Search", "chrome.exe"),  # browser
        ("Open", "chrome.exe"),  # browser
        ("Loading...", "chrome.exe"),  # browser
        ("Start Page", "chrome.exe"),  # browser
        
        # SSH/Terminal clients -> server
        ("Termius - SSH client", "termius.exe"),  # server
        ("user@server - Termius", "termius.exe"),  # server
        ("PuTTY Configuration", "putty.exe"),  # server
        ("MobaXterm", "mobaxterm.exe"),  # server
        
        # Regular test cases
        ("Untitled", "Unknown"),  # non-work
        ("New Tab", "chrome.exe"),  # browser
        ("New Incognito Tab", "chrome.exe"),  # browser
        ("Inbox (11,928) - ankita@firsteconomy.com - First Economy Mail", "chrome.exe"),  # browser
        ("YouTube - Google Chrome", "chrome.exe"),  # browser
        ("Gmail - Inbox - Google Chrome", "chrome.exe"),  # browser
        ("claude.ai - Claude", "chrome.exe"),  # productive
        ("Dashboard - WAAREE Admin", "chrome.exe"),  # productive
        ("Timesheet App", "chrome.exe"),  # productive
        ("ChatGPT", "chrome.exe"),  # productive
        ("Stack Overflow - Python question", "chrome.exe"),  # productive
        ("cPanel - Web Hosting Control Panel", "chrome.exe"),  # productive
        ("waaree_main - live@waaree.com@103.174.103.109 - FileZilla", "filezilla.exe"),  # productive
        ("FileZilla - FTP Client", "filezilla.exe"),  # productive
        ("My Site ‹ WAAREE — WordPress", "chrome.exe"),  # productive
        ("WAAREE – WordPress › Dashboard ‹ My Site — WordPress", "chrome.exe"),  # productive
        ("wp-admin / Edit Page ‹ WAAREE", "chrome.exe"),  # productive
        ("Google Search: python tutorial", "chrome.exe"),  # browser
        ("Facebook", "chrome.exe"),  # browser
        ("main.py - Visual Studio Code", "code.exe"),  # productive
        ("localhost:3000 - React App", "chrome.exe"),  # productive
        ("Untitled - Notepad", "notepad.exe"),  # non-work
        ("AWS Management Console", "chrome.exe"),  # server
        ("EC2 Dashboard", "chrome.exe"),  # server
    ]
    
    print("Testing categorization:")
    print("-" * 80)
    for title, app in test_cases:
        category_info = categorizer.get_detailed_category(title, app)
        print(f"Title: {title:<50} App: {app:<20}")
        print(f"→ Category: {category_info['category']:<12} Subcategory: {category_info['subcategory']:<20}")
        print("-" * 80)