smileyc commited on
Commit
1b737d3
Β·
1 Parent(s): 6558ee8

Switch to Modal backend with security

Browse files
Files changed (1) hide show
  1. app.py +208 -228
app.py CHANGED
@@ -1,7 +1,6 @@
1
  """
2
- MCP Video Agent - Hugging Face Space Deployment
3
- Combines Gradio frontend with direct Gemini API integration
4
- Optimized for HF Space deployment with implicit caching
5
  """
6
 
7
  import os
@@ -9,172 +8,62 @@ import gradio as gr
9
  import time
10
  import hashlib
11
  import base64
 
 
12
 
13
  # ==========================================
14
- # Flexible API Key Loading
15
  # ==========================================
16
- def get_api_key(key_name):
17
- """Get API key from environment variables (HF Space Secrets)."""
18
- key = os.environ.get(key_name)
19
- if key:
20
- print(f"βœ… Using {key_name} from environment")
21
- return key
22
- print(f"⚠️ {key_name} not found")
23
- return None
24
-
25
- # ==========================================
26
- # Video Analysis with Implicit Caching
27
- # ==========================================
28
-
29
- # Cache for uploaded Gemini files
30
- gemini_files_cache = {}
31
-
32
- def analyze_video_with_gemini(query: str, video_path: str):
33
- """
34
- Analyze video using Gemini 2.5 Flash with implicit caching.
35
-
36
- Args:
37
- query: User's question
38
- video_path: Local path to video file
39
-
40
- Returns:
41
- str: Analysis result
42
- """
43
- from google import genai
44
- import hashlib
45
-
46
- # Get API key
47
- api_key = get_api_key("GOOGLE_API_KEY")
48
- if not api_key:
49
- return "❌ Error: GOOGLE_API_KEY not set. Please configure it in Space Settings β†’ Secrets."
50
-
51
- client = genai.Client(api_key=api_key)
52
 
53
- # Generate cache key for this video
54
- with open(video_path, 'rb') as f:
55
- video_hash = hashlib.md5(f.read()).hexdigest()
56
-
57
- cache_key = f"{video_path}_{video_hash}"
58
-
59
- try:
60
- # Check if we already uploaded this file
61
- if cache_key in gemini_files_cache:
62
- file_name = gemini_files_cache[cache_key]
63
- print(f"♻️ Using cached file: {file_name}")
64
-
65
- try:
66
- video_file = client.files.get(name=file_name)
67
- if video_file.state.name == 'ACTIVE':
68
- print(f"βœ… Cached file is active")
69
- else:
70
- print(f"⚠️ Cached file state: {video_file.state.name}, re-uploading...")
71
- video_file = None
72
- except Exception as e:
73
- print(f"⚠️ Cached file retrieval failed: {e}")
74
- video_file = None
75
- else:
76
- video_file = None
77
-
78
- # Upload if needed
79
- if video_file is None:
80
- print(f"πŸ“€ Uploading video to Gemini...")
81
- video_file = client.files.upload(file=video_path)
82
-
83
- # Wait for processing
84
- while video_file.state.name == 'PROCESSING':
85
- print('.', end='', flush=True)
86
- time.sleep(2)
87
- video_file = client.files.get(name=video_file.name)
88
-
89
- if video_file.state.name == 'FAILED':
90
- return "❌ Video processing failed"
91
-
92
- print(f"\nβœ… Video uploaded: {video_file.uri}")
93
-
94
- # Cache the file reference
95
- gemini_files_cache[cache_key] = video_file.name
96
-
97
- # Generate content (implicit caching happens automatically)
98
- print(f"🧠 Analyzing with Gemini 2.5 Flash...")
99
 
100
- response = client.models.generate_content(
101
- model="gemini-2.5-flash",
102
- contents=[
103
- video_file,
104
- f"{query}\n\nPlease provide a detailed but focused response within 300-400 words. Do NOT mention specific timestamps unless the user asks about timing."
105
- ]
106
- )
107
 
108
- # Print usage metadata
109
- if hasattr(response, 'usage_metadata'):
110
- print(f"πŸ“Š Usage: {response.usage_metadata}")
111
 
112
- if response.text:
113
- return response.text
114
- else:
115
- return "⚠️ No response generated. The content may have been blocked."
116
-
117
- except Exception as e:
118
- print(f"❌ Analysis error: {e}")
119
- return f"❌ Error: {str(e)}"
 
 
120
 
 
 
 
121
 
122
- def generate_speech(text: str):
123
- """
124
- Generate speech from text using ElevenLabs.
125
-
126
- Args:
127
- text: Text to convert to speech
128
-
129
- Returns:
130
- str: Path to generated audio file or None
131
- """
132
- from elevenlabs.client import ElevenLabs
133
-
134
- # Get API key
135
- api_key = get_api_key("ELEVENLABS_API_KEY")
136
- if not api_key:
137
- print("⚠️ ELEVENLABS_API_KEY not set, skipping TTS")
138
- return None
139
-
140
  try:
141
- # Limit text length
142
- max_chars = 2500
143
- safe_text = text[:max_chars] if len(text) > max_chars else text
144
-
145
- if len(text) > max_chars:
146
- safe_text = safe_text.rstrip() + "..."
147
- print(f"⚠️ Text truncated from {len(text)} to {max_chars} chars")
148
-
149
- print(f"πŸ—£οΈ Generating speech ({len(safe_text)} chars)...")
150
- start_time = time.time()
151
-
152
- client = ElevenLabs(api_key=api_key)
153
-
154
- audio_generator = client.text_to_speech.convert(
155
- voice_id="21m00Tcm4TlvDq8ikWAM",
156
- output_format="mp3_44100_128",
157
- text=safe_text,
158
- model_id="eleven_multilingual_v2"
159
- )
160
-
161
- # Generate unique filename
162
- timestamp = int(time.time())
163
- output_path = f"response_{timestamp}.mp3"
164
-
165
- with open(output_path, "wb") as f:
166
- for chunk in audio_generator:
167
- f.write(chunk)
168
-
169
- elapsed = time.time() - start_time
170
- print(f"βœ… Speech generated in {elapsed:.2f}s")
171
- return output_path
172
-
173
  except Exception as e:
174
- print(f"❌ TTS error: {e}")
175
  return None
176
 
177
-
178
  # ==========================================
179
  # Gradio Interface Logic
180
  # ==========================================
@@ -182,15 +71,25 @@ def generate_speech(text: str):
182
  # Cache for uploaded videos
183
  uploaded_videos_cache = {}
184
 
185
- def process_interaction(user_message, history, video_file):
186
  """
187
- Core chatbot logic for HF Space.
188
  """
189
  if history is None:
190
  history = []
191
 
192
- # Track latest audio
193
- latest_audio = None
 
 
 
 
 
 
 
 
 
 
194
 
195
  # 1. Check video upload
196
  if video_file is None:
@@ -205,30 +104,61 @@ def process_interaction(user_message, history, video_file):
205
  yield history + [{"role": "assistant", "content": f"❌ Video too large! Size: {file_size_mb:.1f}MB. Please upload a video smaller than 100MB."}]
206
  return
207
 
208
- # Check cache
209
  with open(local_path, 'rb') as f:
210
  file_hash = hashlib.md5(f.read()).hexdigest()[:8]
211
 
 
 
212
  cache_key = f"{local_path}_{file_hash}"
213
 
214
- if cache_key in uploaded_videos_cache:
215
- print(f"♻️ Video already processed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  else:
217
- print(f"πŸ“Ή New video: {local_path} ({file_size_mb:.1f}MB)")
218
- uploaded_videos_cache[cache_key] = True
 
 
219
 
220
- # 2. Show thinking message
221
- history.append({"role": "user", "content": user_message})
222
- history.append({"role": "assistant", "content": "πŸ€” Gemini is analyzing the video..."})
223
  yield history
224
 
225
- # 3. Analyze video
226
  try:
227
- text_response = analyze_video_with_gemini(user_message, local_path)
 
 
 
 
 
 
228
  except Exception as e:
229
  text_response = f"❌ Analysis error: {str(e)}"
230
 
231
- # Store full text
232
  full_text_response = text_response
233
 
234
  # 4. Generate audio if successful
@@ -237,30 +167,38 @@ def process_interaction(user_message, history, video_file):
237
  yield history
238
 
239
  try:
240
- # Generate audio
241
- audio_path = generate_speech(text_response)
 
 
 
 
 
 
242
 
243
- # Wait for file to be ready
244
- if audio_path and os.path.exists(audio_path):
245
- time.sleep(0.5)
 
 
 
 
 
 
 
 
 
246
 
247
- # Check file has content
248
- if os.path.getsize(audio_path) > 0:
249
- # Retry logic
250
- max_retries = 2
251
- for retry in range(max_retries):
252
- if os.path.getsize(audio_path) > 1000: # At least 1KB
253
- break
254
- print(f"⏳ Retry {retry + 1}: File too small, waiting...")
255
- time.sleep(2)
256
-
257
- # Read audio and create response
258
- with open(audio_path, 'rb') as f:
259
- audio_bytes = f.read()
260
- audio_base64 = base64.b64encode(audio_bytes).decode()
261
-
262
- # Create response with embedded audio
263
- response_content = f"""πŸŽ™οΈ **Audio Response**
264
 
265
  <audio controls autoplay style="width: 100%; margin: 10px 0; background: #f0f0f0; border-radius: 5px;">
266
  <source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg">
@@ -271,57 +209,81 @@ def process_interaction(user_message, history, video_file):
271
  <div style="background-color: #000000; color: #00ff00; padding: 25px; border-radius: 10px; font-family: 'Courier New', monospace; line-height: 1.8; font-size: 14px; white-space: normal; word-wrap: break-word; overflow-wrap: break-word; max-width: 100%;">
272
  {full_text_response}
273
  </div>"""
274
-
275
- history[-1] = {"role": "assistant", "content": response_content}
276
- yield history
277
- else:
278
- # Audio file is empty
279
- history[-1] = {"role": "assistant", "content": f"⚠️ Audio generation produced empty file.\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
280
- yield history
281
  else:
282
- # No audio generated
283
- history[-1] = {"role": "assistant", "content": f"⚠️ Audio generation skipped (API key not set).\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
284
  yield history
285
 
286
  except Exception as e:
287
- # Audio error
288
  history[-1] = {"role": "assistant", "content": f"❌ Audio error: {str(e)}\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
289
  yield history
290
  else:
291
- # Error in analysis
292
  history[-1] = {"role": "assistant", "content": text_response}
293
  yield history
294
 
295
 
296
  # ==========================================
297
- # Gradio Interface
298
  # ==========================================
299
 
300
- with gr.Blocks(title="MCP Video Agent") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
301
  gr.Markdown("# πŸŽ₯ MCP Video Agent")
302
- gr.Markdown("**Powered by Gemini 2.5 Flash + ElevenLabs TTS**")
 
 
 
 
 
 
 
 
 
303
 
304
- gr.Markdown("""
305
  ### πŸ“– How to Use
306
- 1. Upload a video (MP4, max 100MB)
307
- 2. Ask questions about the video
308
- 3. Get AI-powered voice and text responses!
309
-
310
- ### πŸ”Œ Use as MCP Server in Claude Desktop
311
- Add this URL to your Claude Desktop config:
312
- ```
313
- https://YOUR_USERNAME-mcp-video-agent.hf.space/sse
314
- ```
315
-
316
- **Note:** This Space uses the owner's API keys. For heavy usage, please:
317
- 1. Click "Duplicate this Space"
318
- 2. Add your own `GOOGLE_API_KEY` and `ELEVENLABS_API_KEY` in Settings β†’ Secrets
319
-
320
- ### βš™οΈ Required Secrets (in Space Settings)
321
- - `GOOGLE_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/apikey)
322
- - `ELEVENLABS_API_KEY` - Get from [ElevenLabs](https://elevenlabs.io) (optional, for TTS)
 
 
 
 
 
 
 
 
 
323
  """)
324
 
 
 
325
  with gr.Row():
326
  with gr.Column(scale=1):
327
  video_input = gr.Video(label="πŸ“Ή Upload Video (MP4)", sources=["upload"])
@@ -346,25 +308,43 @@ with gr.Blocks(title="MCP Video Agent") as demo:
346
  inputs=msg
347
  )
348
 
 
 
 
 
 
 
349
  # Event handlers
350
  submit_btn.click(
351
  process_interaction,
352
- inputs=[msg, chatbot, video_input],
353
  outputs=[chatbot]
354
  )
355
 
356
  msg.submit(
357
  process_interaction,
358
- inputs=[msg, chatbot, video_input],
359
  outputs=[chatbot]
360
  )
361
 
362
  # ==========================================
363
- # Launch
364
  # ==========================================
365
 
366
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
367
  demo.launch(
 
368
  show_error=True,
369
  share=False
370
  )
 
 
1
  """
2
+ MCP Video Agent - HF Space with Modal Backend + Security
3
+ Connects to Modal backend with authentication and rate limiting
 
4
  """
5
 
6
  import os
 
8
  import time
9
  import hashlib
10
  import base64
11
+ from datetime import datetime, timedelta
12
+ from collections import defaultdict
13
 
14
  # ==========================================
15
+ # Security: Rate Limiting
16
  # ==========================================
17
+ class RateLimiter:
18
+ """Simple in-memory rate limiter"""
19
+ def __init__(self, max_requests_per_hour=10):
20
+ self.max_requests = max_requests_per_hour
21
+ self.requests = defaultdict(list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ def is_allowed(self, user_id):
24
+ """Check if user is within rate limit"""
25
+ now = datetime.now()
26
+ cutoff = now - timedelta(hours=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Clean old requests
29
+ self.requests[user_id] = [
30
+ req_time for req_time in self.requests[user_id]
31
+ if req_time > cutoff
32
+ ]
 
 
33
 
34
+ # Check limit
35
+ if len(self.requests[user_id]) >= self.max_requests:
36
+ return False
37
 
38
+ # Record new request
39
+ self.requests[user_id].append(now)
40
+ return True
41
+
42
+ def get_remaining(self, user_id):
43
+ """Get remaining requests for user"""
44
+ now = datetime.now()
45
+ cutoff = now - timedelta(hours=1)
46
+ recent = [t for t in self.requests[user_id] if t > cutoff]
47
+ return max(0, self.max_requests - len(recent))
48
 
49
+ # Initialize rate limiter (configurable via environment)
50
+ MAX_REQUESTS_PER_HOUR = int(os.environ.get("MAX_REQUESTS_PER_HOUR", "10"))
51
+ rate_limiter = RateLimiter(max_requests_per_hour=MAX_REQUESTS_PER_HOUR)
52
 
53
+ # ==========================================
54
+ # Modal Connection
55
+ # ==========================================
56
+ import modal
57
+
58
+ def get_modal_function(function_name):
59
+ """Connect to Modal function"""
 
 
 
 
 
 
 
 
 
 
 
60
  try:
61
+ func = modal.Function.from_name("mcp-video-agent", function_name)
62
+ return func
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  except Exception as e:
64
+ print(f"❌ Failed to connect to Modal: {e}")
65
  return None
66
 
 
67
  # ==========================================
68
  # Gradio Interface Logic
69
  # ==========================================
 
71
  # Cache for uploaded videos
72
  uploaded_videos_cache = {}
73
 
74
+ def process_interaction(user_message, history, video_file, username, request: gr.Request):
75
  """
76
+ Core chatbot logic with Modal backend and security.
77
  """
78
  if history is None:
79
  history = []
80
 
81
+ # Get user identifier for rate limiting
82
+ user_id = username # Use authenticated username
83
+
84
+ # Check rate limit
85
+ if not rate_limiter.is_allowed(user_id):
86
+ remaining = rate_limiter.get_remaining(user_id)
87
+ yield history + [{"role": "assistant", "content": f"⚠️ Rate limit exceeded. You have {remaining} requests remaining this hour. Please try again later."}]
88
+ return
89
+
90
+ # Show remaining requests
91
+ remaining = rate_limiter.get_remaining(user_id)
92
+ print(f"πŸ’‘ User {user_id}: {remaining} requests remaining this hour")
93
 
94
  # 1. Check video upload
95
  if video_file is None:
 
104
  yield history + [{"role": "assistant", "content": f"❌ Video too large! Size: {file_size_mb:.1f}MB. Please upload a video smaller than 100MB."}]
105
  return
106
 
107
+ # Generate unique filename
108
  with open(local_path, 'rb') as f:
109
  file_hash = hashlib.md5(f.read()).hexdigest()[:8]
110
 
111
+ timestamp = int(time.time())
112
+ unique_filename = f"video_{timestamp}_{file_hash}.mp4"
113
  cache_key = f"{local_path}_{file_hash}"
114
 
115
+ # 2. Upload to Modal Volume if needed
116
+ if cache_key not in uploaded_videos_cache:
117
+ history.append({"role": "user", "content": user_message})
118
+ history.append({"role": "assistant", "content": f"πŸ“€ Uploading video ({file_size_mb:.1f}MB)..."})
119
+ yield history
120
+
121
+ try:
122
+ import subprocess
123
+ result = subprocess.run(
124
+ ["modal", "volume", "put", "video-storage", local_path, f"/{unique_filename}", "--force"],
125
+ capture_output=True,
126
+ text=True,
127
+ timeout=300
128
+ )
129
+
130
+ if result.returncode != 0:
131
+ history[-1] = {"role": "assistant", "content": f"❌ Upload failed: {result.stderr}"}
132
+ yield history
133
+ return
134
+
135
+ uploaded_videos_cache[cache_key] = unique_filename
136
+ print(f"βœ… Video uploaded: {unique_filename}")
137
+ except Exception as e:
138
+ history[-1] = {"role": "assistant", "content": f"❌ Upload error: {str(e)}"}
139
+ yield history
140
+ return
141
  else:
142
+ unique_filename = uploaded_videos_cache[cache_key]
143
+ history.append({"role": "user", "content": user_message})
144
+ history.append({"role": "assistant", "content": "♻️ Using cached video..."})
145
+ yield history
146
 
147
+ # 3. Analyze video via Modal
148
+ history[-1] = {"role": "assistant", "content": "πŸ€” Analyzing video with Gemini..."}
 
149
  yield history
150
 
 
151
  try:
152
+ analyze_fn = get_modal_function("_internal_analyze_video")
153
+ if analyze_fn is None:
154
+ history[-1] = {"role": "assistant", "content": "❌ Failed to connect to Modal backend. Please check deployment."}
155
+ yield history
156
+ return
157
+
158
+ text_response = analyze_fn.remote(user_message, video_filename=unique_filename)
159
  except Exception as e:
160
  text_response = f"❌ Analysis error: {str(e)}"
161
 
 
162
  full_text_response = text_response
163
 
164
  # 4. Generate audio if successful
 
167
  yield history
168
 
169
  try:
170
+ speak_fn = get_modal_function("_internal_speak_text")
171
+ if speak_fn is None:
172
+ history[-1] = {"role": "assistant", "content": f"⚠️ TTS unavailable.\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
173
+ yield history
174
+ return
175
+
176
+ audio_filename = f"audio_{unique_filename.replace('.mp4', '.mp3')}"
177
+ speak_fn.remote(text_response, audio_filename=audio_filename)
178
 
179
+ # Download audio
180
+ time.sleep(2)
181
+ import subprocess
182
+ local_audio = f"/tmp/{audio_filename}"
183
+
184
+ max_retries = 3
185
+ for retry in range(max_retries):
186
+ result = subprocess.run(
187
+ ["modal", "volume", "get", "video-storage", f"/{audio_filename}", local_audio],
188
+ capture_output=True,
189
+ text=True
190
+ )
191
 
192
+ if result.returncode == 0 and os.path.exists(local_audio) and os.path.getsize(local_audio) > 1000:
193
+ break
194
+ time.sleep(2)
195
+
196
+ if os.path.exists(local_audio) and os.path.getsize(local_audio) > 1000:
197
+ with open(local_audio, 'rb') as f:
198
+ audio_bytes = f.read()
199
+ audio_base64 = base64.b64encode(audio_bytes).decode()
200
+
201
+ response_content = f"""πŸŽ™οΈ **Audio Response** ({remaining} requests remaining this hour)
 
 
 
 
 
 
 
202
 
203
  <audio controls autoplay style="width: 100%; margin: 10px 0; background: #f0f0f0; border-radius: 5px;">
204
  <source src="data:audio/mpeg;base64,{audio_base64}" type="audio/mpeg">
 
209
  <div style="background-color: #000000; color: #00ff00; padding: 25px; border-radius: 10px; font-family: 'Courier New', monospace; line-height: 1.8; font-size: 14px; white-space: normal; word-wrap: break-word; overflow-wrap: break-word; max-width: 100%;">
210
  {full_text_response}
211
  </div>"""
212
+
213
+ history[-1] = {"role": "assistant", "content": response_content}
214
+ yield history
 
 
 
 
215
  else:
216
+ history[-1] = {"role": "assistant", "content": f"⚠️ Audio generation incomplete.\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
 
217
  yield history
218
 
219
  except Exception as e:
 
220
  history[-1] = {"role": "assistant", "content": f"❌ Audio error: {str(e)}\n\n<div style='background: black; color: lime; padding: 20px; border-radius: 10px; white-space: normal; word-wrap: break-word;'>{full_text_response}</div>"}
221
  yield history
222
  else:
 
223
  history[-1] = {"role": "assistant", "content": text_response}
224
  yield history
225
 
226
 
227
  # ==========================================
228
+ # Gradio Interface with Authentication
229
  # ==========================================
230
 
231
+ # Get credentials from environment
232
+ GRADIO_USERNAME = os.environ.get("GRADIO_USERNAME", "admin")
233
+ GRADIO_PASSWORD = os.environ.get("GRADIO_PASSWORD")
234
+
235
+ # Authentication function (optional for Hackathon/Demo)
236
+ def authenticate(username, password):
237
+ """Authenticate users - only if password is set"""
238
+ if GRADIO_PASSWORD is None:
239
+ # No password set, allow anyone (good for Hackathon/Demo)
240
+ return True
241
+ return username == GRADIO_USERNAME and password == GRADIO_PASSWORD
242
+
243
+ with gr.Blocks(title="πŸŽ₯ MCP Video Agent") as demo:
244
  gr.Markdown("# πŸŽ₯ MCP Video Agent")
245
+ gr.Markdown("**πŸ† MCP 1st Birthday Hackathon** | Track: MCP in Action (Consumer & Creative)")
246
+
247
+ gr.Markdown(f"""
248
+ ### ⚑ Key Innovation: Smart Frame Caching
249
+
250
+ **First Query**: Video is analyzed deeply and cached (~8-12 seconds)
251
+ **Follow-up Queries**: Instant responses using cached context (~2-3 seconds, 90% cost reduction!)
252
+ **Cache Duration**: 1 hour - ask multiple questions without reprocessing
253
+
254
+ ---
255
 
 
256
  ### πŸ“– How to Use
257
+
258
+ 1. **Upload** a video (MP4, max 100MB)
259
+ 2. **Ask** your first question - video will be analyzed and cached
260
+ 3. **Continue** asking follow-up questions - experience the speed boost!
261
+ 4. **Listen** to voice responses (powered by ElevenLabs TTS)
262
+
263
+ **Pro Tip**: After your first question, try asking 2-3 more to see how fast cached responses are!
264
+
265
+ ---
266
+
267
+ ### πŸ›‘οΈ Fair Usage Policy
268
+
269
+ - **Rate Limit**: {MAX_REQUESTS_PER_HOUR} requests per hour per user
270
+ - **Video Size**: Max 100MB
271
+ - **Shared Resources**: This is a Hackathon demo - please use responsibly
272
+
273
+ ---
274
+
275
+ ### πŸ”§ Tech Stack
276
+
277
+ - **Gemini 2.5 Flash**: Multimodal video analysis + Context Caching
278
+ - **Modal**: Serverless backend + Persistent storage
279
+ - **ElevenLabs**: Neural text-to-speech
280
+ - **Gradio 6.0**: Interactive UI
281
+
282
+ **Sponsor Tech Used**: βœ… Modal | βœ… Google Gemini | βœ… ElevenLabs
283
  """)
284
 
285
+ username_state = gr.State("")
286
+
287
  with gr.Row():
288
  with gr.Column(scale=1):
289
  video_input = gr.Video(label="πŸ“Ή Upload Video (MP4)", sources=["upload"])
 
308
  inputs=msg
309
  )
310
 
311
+ # Get username from Gradio request
312
+ def set_username(request: gr.Request):
313
+ return request.username if hasattr(request, 'username') else "anonymous"
314
+
315
+ demo.load(set_username, None, username_state)
316
+
317
  # Event handlers
318
  submit_btn.click(
319
  process_interaction,
320
+ inputs=[msg, chatbot, video_input, username_state],
321
  outputs=[chatbot]
322
  )
323
 
324
  msg.submit(
325
  process_interaction,
326
+ inputs=[msg, chatbot, video_input, username_state],
327
  outputs=[chatbot]
328
  )
329
 
330
  # ==========================================
331
+ # Launch with Authentication
332
  # ==========================================
333
 
334
  if __name__ == "__main__":
335
+ # Optional authentication (for Hackathon, usually not needed)
336
+ auth_config = None
337
+ if GRADIO_PASSWORD:
338
+ auth_config = authenticate
339
+ print(f"πŸ”’ Authentication enabled. Username: {GRADIO_USERNAME}")
340
+ else:
341
+ print("🌐 Public access enabled (no authentication required)")
342
+ print(" Rate limiting active to prevent abuse")
343
+ print(f" Limit: {MAX_REQUESTS_PER_HOUR} requests/hour per user")
344
+
345
  demo.launch(
346
+ auth=auth_config,
347
  show_error=True,
348
  share=False
349
  )
350
+