import gradio as gr
import whisper
import yt_dlp
from transformers import pipeline
import tempfile
import os
import json

# Cache models globally
MODEL = None
CLASSIFIER = None

def load_models():
    global MODEL, CLASSIFIER
    if MODEL is None:
        print("Loading models...")
        MODEL = whisper.load_model("base")
        CLASSIFIER = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    return MODEL, CLASSIFIER

def convert_cookies_to_single_line():
    """Utility function to convert cookies.txt to single-line format"""
    try:
        with open("cookies.txt") as f:
            single_line = f.read().replace("\n", "\\n")
            print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
            print(single_line)
            return single_line
    except FileNotFoundError:
        print("Error: cookies.txt file not found")
        return None

def setup_cookies():
    """Handle cookies from environment variable"""
    cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
    if not cookies_txt:
        return False
        
    with open('cookies.txt', 'w') as f:
        f.write(cookies_txt.replace("\\n", "\n"))
    return True

def normalize_youtube_url(url):
    """Convert various YouTube URL formats to standard watch URL"""
    url = url.strip()
    
    # Handle youtu.be short links
    if 'youtu.be' in url.lower():
        video_id = url.split('/')[-1].split('?')[0]
        return f'https://www.youtube.com/watch?v={video_id}'
    
    # Ensure URL is in standard format
    if 'youtube.com/watch' not in url.lower():
        return None
        
    return url.split('&')[0]  # Remove any extra parameters

def analyze_video(yt_url):
    try:
        # Normalize and validate URL
        normalized_url = normalize_youtube_url(yt_url)
        if not normalized_url:
            return "Error: Invalid YouTube URL. Must be from youtube.com or youtu.be", "", 0

        model, classifier = load_models()
        has_cookies = setup_cookies()
        
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
            tmp_path = tmp.name
            
        try:
            ydl_opts = {
                'format': 'bestaudio/best',
                'outtmpl': tmp_path,
                'quiet': True,
                'extract_audio': True,
                'postprocessors': [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': '192',
                }],
                'http_headers': {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
                    'Accept-Language': 'en-US,en;q=0.9',
                    'Referer': 'https://www.youtube.com/'
                },
                'socket_timeout': 30,
                'noplaylist': True,
                'verbose': False
            }
            
            if has_cookies:
                ydl_opts.update({
                    'cookiefile': 'cookies.txt',
                    'extract_flat': 'in_playlist',
                })
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                try:
                    info = ydl.extract_info(normalized_url, download=False)
                    if not info.get('url') and not info.get('requested_downloads'):
                        return "Error: Failed to extract video info. Cookies may be invalid.", "", 0
                    
                    ydl.download([normalized_url])
                except yt_dlp.utils.DownloadError as e:
                    if "Sign in to confirm you're not a bot" in str(e):
                        return "Error: YouTube requires authentication. Please ensure cookies are fresh and valid.", "", 0
                    raise e
                
            result = model.transcribe(tmp_path)
            transcription = result["text"]
            
            labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
            classification = classifier(
                transcription,
                candidate_labels=labels,
                hypothesis_template="This content is about {}."
            )
            
            return transcription, classification["labels"][0], round(classification["scores"][0], 3)
            
        finally:
            for f in [tmp_path, 'cookies.txt']:
                if os.path.exists(f):
                    os.remove(f)
                
    except Exception as e:
        return f"Error: {str(e)}", "", 0

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎬 YouTube Content Analyzer")
    with gr.Row():
        url = gr.Textbox(label="YouTube URL", 
                       placeholder="https://www.youtube.com/watch?v=... or https://youtu.be/...")
        btn = gr.Button("Analyze", variant="primary")
    with gr.Row():
        transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
        with gr.Column():
            label = gr.Label(label="Category")
            confidence = gr.Number(label="Confidence Score", precision=2)
    btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])

if __name__ == "__main__":
    if os.path.exists("cookies.txt"):
        convert_cookies_to_single_line()
    demo.launch()