from spaces import GPU
import gradio as gr
import torch
import os
import time
from torchvision import models
from joblib import load
from extractor.visualise_vit_layer import VitGenerator
from relax_vqa import get_deep_feature, process_video_feature, process_patches, get_frame_patches, flow_to_rgb, merge_fragments, concatenate_features
from extractor.vf_extract import process_video_residual
from model_regression import Mlp, preprocess_data
from demo_test_gpu import evaluate_video_quality, load_model


@GPU
def run_relax_vqa(video_path, is_finetune, framerate, video_type):
    if not os.path.exists(video_path):
        return "❌ No video uploaded or the uploaded file has expired. Please upload again."

    print("CUDA available:", torch.cuda.is_available())
    print("Current device:", torch.cuda.current_device())

    config = {
        'is_finetune': is_finetune,
        'framerate': framerate,
        'video_type': video_type,
        'save_path': 'model/',
        'train_data_name': 'lsvq_train',
        'select_criteria': 'byrmse',
        'video_path': video_path,
        'video_name': os.path.splitext(os.path.basename(video_path))[0]
    }
    print(config['video_name'])
    print(config['video_path'])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    resnet50 = models.resnet50(pretrained=True).to(device)
    vit = VitGenerator('vit_base', 16, device, evaluate=True, random=False, verbose=False)
    model_mlp = load_model(config, device)

    try:
        score, runtime = evaluate_video_quality(config, resnet50, vit, model_mlp, device)
        return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)"
    except Exception as e:
        return f"❌ Error: {str(e)}"
    finally:
        if "gradio" in video_path and os.path.exists(video_path):
            os.remove(video_path)


demo = gr.Interface(
    fn=run_relax_vqa,
    inputs=[
        gr.Video(label="Upload a Video （e.g. mp4）"),
        gr.Checkbox(label="Use Finetuning?", value=False),
        gr.Slider(label="Target Framerate (fps)", minimum=1, maximum=60, step=1, value=24),
        gr.Dropdown(label="Video Dataset Type", choices=["konvid_1k", "youtube_ugc", "live_vqc", "lsvq"],
                    value="konvid_1k")
    ],
    outputs=gr.Textbox(label="Predicted Quality Score"),
    title="🎬 ReLaX-VQA Online Demo",
    description=(
        "Upload a short video and get the predicted perceptual quality score using the ReLaX-VQA model. "
        "You can try our test video from the "
        "<a href='https://huggingface.co/spaces/xinyiW915/ReLaX-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. "
        "<br><br>"
        # "⚙️ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU • 16 GB RAM."
        "⚙️ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)."


    ),
)

demo.launch()