TroglodyteDerivations commited on
Commit
c7c69fa
·
verified ·
1 Parent(s): 2b13fbf

Upload 12 files

Browse files
Gradio App Ver 1/app_es.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import math
3
+ import numpy as np
4
+ import plotly.graph_objects as go
5
+ from smolLM3 import load_model_with_config, GenerationConfig, generate_text
6
+ import re
7
+ import torch
8
+ import torchaudio as ta
9
+ from chatterbox.tts import ChatterboxTTS
10
+ import logging
11
+ import os
12
+ from datetime import datetime
13
+ from lipsync import LipSync
14
+ from pathlib import Path
15
+ import pickle
16
+ import warnings
17
+
18
+
19
+ # Configure logging
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
23
+ handlers=[
24
+ logging.FileHandler('robotic_arm.log'),
25
+ logging.StreamHandler()
26
+ ]
27
+ )
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ warnings.filterwarnings("ignore", category=FutureWarning)
32
+
33
+ torch_load_original = torch.load
34
+ def patched_torch_load(*args, **kwargs):
35
+ if 'map_location' not in kwargs:
36
+ kwargs['map_location'] = map_location
37
+
38
+ # Handle both string and Path objects
39
+ path_str = str(args[0]) if len(args) > 0 else str(kwargs.get('f', ''))
40
+ if 'wav2lip_gan.pth' in path_str:
41
+ kwargs.update({
42
+ 'weights_only': False,
43
+ 'pickle_module': pickle,
44
+ 'encoding': 'latin1'
45
+ })
46
+ return torch_load_original(*args, **kwargs)
47
+ torch.load = patched_torch_load
48
+
49
+ # Initialize TTS (device setup similar to example_for_mac.py)
50
+ device = "mps" if torch.backends.mps.is_available() else "cpu"
51
+ map_location = torch.device(device)
52
+
53
+ # Initialize TTS model (load once when the app starts)
54
+ tts_model = ChatterboxTTS.from_pretrained(device=device)
55
+
56
+ # Voice options must be defined after TTS model is initialized
57
+ DEFAULT_VOICE = None
58
+ VOICE_OPTIONS = {
59
+ "Default Voice": None,
60
+ "Angela Aguilar": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/#ADosDeBorrarte 💚 [NyhOLU-5LAo].mp4").resolve()),
61
+ "Bad Gyal": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Bad Gyal habla un poquito sobre su look | Premio Lo Nuestro 2024 [G38hDOFRROQ].mp4").resolve()),
62
+ "Becky G": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Becky G: look estrella con cat eye definido y labio brillante | Secretos de Belleza | VOGUE España [LbRFHczCWwE].mp4").resolve()),
63
+ "Corina Smith": str(Path("/Users/martinrivera/tiktok/corinasmith_7447217337046011167.mp4").resolve()),
64
+ "Emilia": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Emilia: ¿Es más de gloss o labial? [Bcc-F2PfT_k].mp4").resolve()),
65
+ "Fariana": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Me Muevo Fariana #memuevo #dance #viralvideo #dance #music #viralshorts #reels [zik7N2QJFAQ].mp4").resolve()),
66
+ "Karol G": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/¿Qué hay en el bolso de Karol G? | VOGUE España [hC0tnRAnzMQ].mp4").resolve()),
67
+ "Kenia Os": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/🩷 [EAgBzkYNamQ].mp4").resolve()),
68
+ "La Joaqui": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/✨✨✨✨ [8e-p775v_FY].mp4").resolve()),
69
+ "Maria Becerra": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Mariah Angeliq Cantando "BOBO" En Acapella | #lacasadelartista [2Ao_3awg0Z0].mp4").resolve()),
70
+ "Mariah Angeliq": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Mariah Angeliq "LA TOXICA" Part 2 #ToxicaTips #ToxicaTuesday #lacasadelartista [FOE1oYTeMUs].mp4").resolve()),
71
+ "Naty Peluso": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/#nathypeluso ganó 4 Latin Grammys con vestido vintage [dOVldgGVsmI].mp4").resolve()),
72
+ "Nicki Nicole": str(Path("/Users/martinrivera/tiktok/nicki.nicole_7302167478241676549.mp4").resolve()),
73
+ "Paloma Mami": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/Paloma_Mami_Interview_Trimmed.mp4").resolve()),
74
+ "Young Miko": str(Path("/Users/martinrivera/dl_yt_vid_or_aud/WASSUP 🫶🏼🫶🏼🫶🏼 [irbZzi3qmqY].mp4").resolve())
75
+ }
76
+
77
+ # Verify voice files exist
78
+ for name, path in VOICE_OPTIONS.items():
79
+ if path and not Path(path).exists():
80
+ logger.warning(f"Voice file {name} not found at {path}")
81
+
82
+
83
+ class TwoLinkArm:
84
+ def __init__(self, joint_angles=[0,0]):
85
+ self.shoulder = np.array([0, 0])
86
+ self.link_lengths = [1, 1]
87
+ self.update_joints(joint_angles)
88
+
89
+ def update_joints(self, joint_angles):
90
+ self.joint_angles = joint_angles
91
+ self.forward_kinematics()
92
+
93
+ def forward_kinematics(self):
94
+ theta0 = self.joint_angles[0]
95
+ theta1 = self.joint_angles[1]
96
+ l0 = self.link_lengths[0]
97
+ l1 = self.link_lengths[1]
98
+
99
+ self.elbow = self.shoulder + np.array([l0*np.cos(theta0),l0*np.sin(theta0)]) # Elbow_x = l0cos(theta0) # Elbow_y = l0sin(theta0)
100
+ self.wrist = self.elbow + np.array([l1*np.cos(theta0 + theta1), l1*np.sin(theta0 + theta1)]) # Wrist_x = Elbow_x + l1cos(theta0 + theta1) # Wrist_y = Elbow_y + l1sin(theta0 + theta1) # Wrist_x - Elbow_x = l1cos(theta0 + theta1) # Wrist_y - Elbow_y = l1sin(theta0 + theta1)
101
+
102
+
103
+ def get_plotly_figure(self):
104
+ fig = go.Figure()
105
+
106
+ fig.add_trace(go.Scatter(
107
+ x=[self.shoulder[0], self.elbow[0], self.wrist[0]], # x-coordinates for shoulder, elbow, and wrist
108
+ y=[self.shoulder[1], self.elbow[1], self.wrist[1]], # y-coordinates for shoulder, elbow, and wrist
109
+ mode='lines+markers',
110
+ line=dict(color='red', width=10),
111
+ marker=dict(size=12, color=['black', 'blue', 'green']),
112
+ name='Arm'
113
+ ))
114
+
115
+ max_range = 2.2
116
+ fig.update_xaxes(range=[-max_range, max_range])
117
+ fig.update_yaxes(range=[-max_range, max_range])
118
+
119
+ annotations = [
120
+ dict(x=self.shoulder[0], y=self.shoulder[1], text="Shoulder", showarrow=True), # Shoulder (x,y) coordinates
121
+ dict(x=self.elbow[0], y=self.elbow[1], text="Elbow", showarrow=True), # Elbow (x,y) coordinates
122
+ dict(x=self.wrist[0], y=self.wrist[1], text="Wrist", showarrow=True) # Wrist (x,y) coordinates
123
+ ]
124
+
125
+ fig.update_layout(
126
+ title='Two-Link Robotic Arm Visualization',
127
+ xaxis_title='X Position',
128
+ yaxis_title='Y Position',
129
+ showlegend=False,
130
+ height=500,
131
+ width=500,
132
+ margin=dict(l=20, r=20, t=40, b=20),
133
+ annotations=annotations
134
+ )
135
+
136
+ fig.update_yaxes(scaleanchor="x", scaleratio=1)
137
+
138
+ return fig
139
+
140
+ def calculate_angles(theta0_deg, theta1_deg):
141
+ theta0 = math.radians(theta0_deg)
142
+ theta1 = math.radians(theta1_deg)
143
+
144
+ delta_x = math.cos(theta0) * math.cos(theta1) - math.sin(theta0) * math.sin(theta1)
145
+ delta_y = math.sin(theta0) * math.cos(theta1) + math.cos(theta0) * math.sin(theta1)
146
+
147
+ calculated_angle_rad = math.atan2(delta_y, delta_x)
148
+ calculated_angle_deg = math.degrees(calculated_angle_rad)
149
+
150
+ theta0_validation_deg = calculated_angle_deg - theta1_deg
151
+ theta0_validation_deg = theta0_validation_deg % 360
152
+
153
+ theta1_validation_deg = calculated_angle_deg - theta0_deg
154
+ theta1_validation_deg = theta1_validation_deg % 360
155
+
156
+ elbow_x = math.cos(theta0)
157
+ elbow_y = math.sin(theta0)
158
+
159
+ wrist_x = elbow_x + math.cos(theta0 + theta1)
160
+ wrist_y = elbow_y + math.sin(theta0 + theta1)
161
+
162
+ wrist_elbow_dx = wrist_x - elbow_x
163
+ wrist_elbow_dy = wrist_y - elbow_y
164
+
165
+ return (
166
+ delta_x, delta_y,
167
+ calculated_angle_deg,
168
+ theta0_validation_deg,
169
+ theta1_validation_deg,
170
+ elbow_x, elbow_y,
171
+ wrist_x, wrist_y,
172
+ wrist_elbow_dx, wrist_elbow_dy
173
+ )
174
+
175
+
176
+ def clean_llm_response(text: str) -> str:
177
+ """Clean up LLM response to be more concise"""
178
+ text = re.sub(r'(?i)(wait|okay|let me|i think|i need to|this answer is).*?(?=[A-Z])', '', text)
179
+ sentences = [s.strip() for s in re.split(r'[.!?]', text) if s.strip()]
180
+ return '. '.join(sentences[:3]) + ('.' if len(sentences) >= 3 else '')
181
+
182
+ # Load the LLM model once when the app starts
183
+ llm_config = GenerationConfig()
184
+ llm_config.max_tokens = 150
185
+ llm_config.temperature = 0.3
186
+ model_data = load_model_with_config("HuggingFaceTB/SmolLM3-3B")
187
+ llm_model, llm_tokenizer = model_data["model"], model_data["tokenizer"]
188
+
189
+ def get_llm_insight(theta0_deg, theta1_deg, voice_option=None):
190
+ prompt = f"""Proporciona una explicación concisa (2-3 oraciones) de un brazo robótico de dos eslabones con:
191
+ - Ángulo de hombro (θ₀) = {theta0_deg}°
192
+ - Ángulo de codo (θ₁) = {theta1_deg}°
193
+
194
+ Incluye:
195
+ 1. La configuración resultante
196
+ 2. Una aplicación práctica
197
+ 3. La relación matemática clave
198
+
199
+ Responde solo con la explicación en español, sin prefijos ni comentarios."""
200
+ # Generación de texto con SmolLM3-3B
201
+ result = generate_text(prompt, llm_model, llm_tokenizer, llm_config)
202
+ clean_text = clean_llm_response(result['generated_text'])
203
+
204
+
205
+ # Generación de audio con TTS
206
+ audio_prompt_path = VOICE_OPTIONS.get(voice_option, DEFAULT_VOICE)
207
+
208
+ audio = tts_model.generate(
209
+ text=clean_text,
210
+ audio_prompt_path=audio_prompt_path,
211
+ exaggeration=0.25,
212
+ cfg_weight=0.8
213
+ )
214
+
215
+ # Save the audio to a temporary file
216
+ temp_audio_path = "temp_llm_response.wav"
217
+ ta.save(temp_audio_path, audio, tts_model.sr)
218
+
219
+ return clean_text, temp_audio_path
220
+
221
+ #return {
222
+ #'text_response': clean_text,
223
+ #'audio_response': audio,
224
+ #'angles': (theta0_deg, theta1_deg)
225
+ #}
226
+
227
+ def process_lipsync(visual_file, audio_file):
228
+ device = "cpu" # Using CPU for lipsync for compatibility
229
+ logger.info(f"Using device: {device}")
230
+
231
+ # Create output directory if it does not exist
232
+ os.makedirs('static', exist_ok=True)
233
+
234
+ # Generate unique output filename
235
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
236
+ output_path = f'static/output_{timestamp}.mp4'
237
+
238
+ try:
239
+ lip = LipSync(
240
+ model='wav2lip',
241
+ checkpoint_path='/Users/martinrivera/robotic_arm/lipsync/weights/wav2lip_gan.pth',
242
+ nosmooth=True,
243
+ device='cpu',
244
+ cache_dir='/Users/martinrivera/robotic_arm/lipsync/cache',
245
+ img_size=96,
246
+ save_cache=True
247
+ )
248
+
249
+ logger.info("Starting lip sync process...")
250
+
251
+ # Process based on file type
252
+ if visual_file is None or audio_file is None:
253
+ raise ValueError("Both visual and audio files are required")
254
+
255
+ lip.sync(
256
+ visual_file.name, # Path to uploaded visual file
257
+ audio_file.name, # Path to uploaded audio file
258
+ output_path
259
+ )
260
+ logger.info("Lip sync completed successfully!")
261
+
262
+ return output_path
263
+
264
+ except Exception as e:
265
+ logger.error(f"Error during lip sync: {str(e)}", exc_info=True)
266
+ raise gr.Error(f"Lip sync failed: {str(e)}")
267
+
268
+ def update_components(theta0_deg, theta1_deg, voice_option):
269
+ theta0_rad = math.radians(float(theta0_deg))
270
+ theta1_rad = math.radians(float(theta1_deg))
271
+
272
+ arm = TwoLinkArm([theta0_rad, theta1_rad])
273
+ fig = arm.get_plotly_figure()
274
+
275
+ (
276
+ delta_x, delta_y,
277
+ calculated_angle_deg,
278
+ theta0_validation_deg,
279
+ theta1_validation_deg,
280
+ elbow_x, elbow_y,
281
+ wrist_x, wrist_y,
282
+ wrist_elbow_dx, wrist_elbow_dy
283
+ ) = calculate_angles(float(theta0_deg), float(theta1_deg))
284
+
285
+ results = [
286
+ f"{delta_x:.4f}",
287
+ f"{delta_y:.4f}",
288
+ f"{calculated_angle_deg:.1f}°",
289
+ f"{theta0_validation_deg:.1f}°",
290
+ f"{theta1_validation_deg:.1f}°",
291
+ f"{elbow_x:.4f}",
292
+ f"{elbow_y:.4f}",
293
+ f"{wrist_x:.4f}",
294
+ f"{wrist_y:.4f}",
295
+ f"{wrist_elbow_dx:.4f} (Expected: {math.cos(math.radians(float(theta0_deg)) + float(theta1_rad)):.4f})",
296
+ f"{wrist_elbow_dy:.4f} (Expected: {math.sin(math.radians(float(theta0_deg)) + float(theta1_rad)):.4f})",
297
+ ]
298
+
299
+ # Get LLM insight and audio
300
+ llm_response, audio_path = get_llm_insight(float(theta0_deg), float(theta1_deg), voice_option)
301
+
302
+ return [fig] + results + [llm_response, audio_path, gr.Row(visible=True)]
303
+
304
+ theta_options = [(str(i) + "°", i) for i in range(0, 361)]
305
+
306
+ with gr.Blocks() as app:
307
+ gr.Markdown("""
308
+ # Robotic Arm Angle Calculator with LLM Insights
309
+ ### Equations:
310
+ 1. **cos(θ₀ + θ₁) = cosθ₀cosθ₁ - sinθ₀sinθ₁** → Delta X
311
+ 2. **sin(θ₀ + θ₁) = sinθ₀cosθ₁ + cosθ₀sinθ₁** → Delta Y
312
+ 3. **θ₀ + θ₁ = arctan2(ΔY, ΔX)** → Validates θ₁
313
+ 4-7. **Elbow/Wrist positions** (assuming l₀ = l₁ = 1)
314
+ """)
315
+
316
+ with gr.Row():
317
+ theta0_dropdown = gr.Dropdown(
318
+ choices=theta_options,
319
+ value="45",
320
+ label="θ₀ (Shoulder Angle)",
321
+ interactive=True,
322
+ allow_custom_value=True
323
+ )
324
+ theta1_dropdown = gr.Dropdown(
325
+ choices=theta_options,
326
+ value="60",
327
+ label="θ₁ (Elbow Angle)",
328
+ interactive=True,
329
+ allow_custom_value=True
330
+ )
331
+ voice_dropdown = gr.Dropdown(
332
+ choices=list(VOICE_OPTIONS.keys()),
333
+ value="Default Voice",
334
+ label="Select Voice",
335
+ interactive=True
336
+ )
337
+
338
+ calculate_btn = gr.Button("Calculate", variant="primary")
339
+
340
+ with gr.Row():
341
+ with gr.Column(scale=3):
342
+ gr.Markdown("### Arm Visualization")
343
+ plot = gr.Plot(label="Arm Position")
344
+
345
+ with gr.Column(scale=1):
346
+ gr.Markdown("### Delta Values")
347
+ delta_x = gr.Textbox(label="ΔX (cos(θ₀+θ₁))")
348
+ delta_y = gr.Textbox(label="ΔY (sin(θ₀+θ₁))")
349
+ calculated_angle = gr.Textbox(label="θ₀ + θ₁")
350
+
351
+ with gr.Column(scale=1):
352
+ gr.Markdown("### Joint Positions")
353
+ elbow_x = gr.Textbox(label="Elbow X (cosθ₀)")
354
+ elbow_y = gr.Textbox(label="Elbow Y (sinθ₀)")
355
+ wrist_x = gr.Textbox(label="Wrist X")
356
+ wrist_y = gr.Textbox(label="Wrist Y")
357
+ wrist_elbow_dx = gr.Textbox(label="Wrist-Elbow ΔX")
358
+ wrist_elbow_dy = gr.Textbox(label="Wrist-Elbow ΔY")
359
+
360
+ with gr.Column(scale=1):
361
+ gr.Markdown("### Validation")
362
+ theta0_validation = gr.Textbox(label="Validated θ₀")
363
+ theta1_validation = gr.Textbox(label="Validated θ₁")
364
+
365
+ with gr.Row():
366
+ with gr.Column():
367
+ gr.Markdown("### LLM Explanation")
368
+ llm_output = gr.Textbox(label="Arm Configuration Analysis", lines=3)
369
+ audio_output = gr.Audio(label="Audio Explanation", type="filepath", visible=True)
370
+
371
+ # This row will be hidden initially and shown after calculations
372
+ with gr.Row(visible=False) as lipsync_row:
373
+ with gr.Column():
374
+ gr.Markdown("### Lip Sync Options")
375
+ lipsync_choice = gr.Radio(
376
+ choices=["Yes", "No"],
377
+ label="Would you like to create a lip-synced video of the explanation?",
378
+ value="No"
379
+ )
380
+ with gr.Column(visible=False) as lipsync_inputs:
381
+ visual_input = gr.File(label="Upload Visual File (.jpg or .mp4)", file_types=[".jpg", ".jpeg", ".mp4"])
382
+ audio_input = gr.File(label="Upload Audio File (.wav)", file_types=[".wav"])
383
+ lipsync_btn = gr.Button("Generate Lip Sync", variant="primary")
384
+ lipsync_output = gr.Video(label="Lip Synced Output", height=512, width=512)
385
+ lipsync_message = gr.Textbox(label="Message", visible=False)
386
+
387
+ # Main calculation button
388
+ calculate_btn.click(
389
+ fn=update_components,
390
+ inputs=[theta0_dropdown, theta1_dropdown, voice_dropdown],
391
+ outputs=[
392
+ plot,
393
+ delta_x, delta_y,
394
+ calculated_angle,
395
+ theta0_validation,
396
+ theta1_validation,
397
+ elbow_x, elbow_y,
398
+ wrist_x, wrist_y,
399
+ wrist_elbow_dx, wrist_elbow_dy,
400
+ llm_output,
401
+ audio_output,
402
+ lipsync_row
403
+ ]
404
+ )
405
+
406
+ # Show/hide lipsync inputs based on choice
407
+ def toggle_lipsync_inputs(choice):
408
+ if choice == "Yes":
409
+ return gr.Column(visible=True), gr.Textbox(visible=False), gr.Video(visible=False)
410
+ return gr.Column(visible=False), gr.Textbox(visible=False), gr.Video(visible=False)
411
+
412
+ lipsync_choice.change(
413
+ fn=toggle_lipsync_inputs,
414
+ inputs=lipsync_choice,
415
+ outputs=[lipsync_inputs, lipsync_message, lipsync_output]
416
+ )
417
+
418
+ # Handle lipsync generation
419
+ def handle_lipsync(choice, visual_file, audio_file):
420
+ if choice == "No":
421
+ return gr.Textbox(value="Continue playing with your robotic arm!", visible=True), gr.Video(visible=False)
422
+ else:
423
+ try:
424
+ output_path = process_lipsync(visual_file, audio_file)
425
+ return gr.Textbox(visible=False), gr.Video(value=output_path, visible=True)
426
+ except Exception as e:
427
+ return gr.Textbox(value=f"Error: {str(e)}", visible=True), gr.Video(visible=False)
428
+
429
+ lipsync_btn.click(
430
+ fn=handle_lipsync,
431
+ inputs=[lipsync_choice, visual_input, audio_input],
432
+ outputs=[lipsync_message, lipsync_output]
433
+ )
434
+
435
+ app.launch()
Gradio App Ver 1/install.sh ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
4
+
5
+ # Ensure Python 3.10 is available
6
+ install_python3_10() {
7
+ if ! command -v python3.10 &> /dev/null; then
8
+ echo "Python 3.10 not found. Attempting to install..."
9
+ if [[ "$OSTYPE" == "darwin"* ]]; then
10
+ brew install [email protected]
11
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
12
+ sudo apt-get update
13
+ sudo apt-get install -y python3.10 python3.10-venv
14
+ else
15
+ echo "Unsupported OS. Please install Python 3.10 manually."
16
+ exit 1
17
+ fi
18
+ fi
19
+ }
20
+ install_python3_10
21
+
22
+ # Create both environments with Python 3.10
23
+ echo "Setting up Python environments..."
24
+ python3.10 -m venv "$SCRIPT_DIR/env"
25
+ python3.10 -m venv "$SCRIPT_DIR/lipsync_env"
26
+
27
+ # Install system dependencies
28
+ echo "Installing system dependencies..."
29
+ if [[ "$OSTYPE" == "darwin"* ]]; then
30
+ if ! command -v brew &> /dev/null; then
31
+ echo "Installing Homebrew..."
32
+ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
33
+ fi
34
+ brew install ffmpeg
35
+ elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
36
+ sudo apt-get update && sudo apt-get install -y ffmpeg
37
+ fi
38
+
39
+ # Modified safe_install function
40
+ safe_install() {
41
+ local env_path="$1"
42
+ local requirements_file="$2"
43
+
44
+ echo "Installing packages in $env_path from $requirements_file..."
45
+ source "$env_path/bin/activate"
46
+
47
+ pip install --upgrade pip
48
+
49
+ # Install critical packages with exact versions
50
+ pip install \
51
+ torch==2.7.1 \
52
+ transformers==4.53.1 \
53
+ tokenizers==0.21.2 \
54
+ gradio==5.36.2 \
55
+ numpy==2.2.6 \
56
+ plotly==5.18.0 \
57
+ mlx==0.26.3 \
58
+ mlx-lm==0.26.0
59
+
60
+ # Install other requirements (excluding already installed packages)
61
+ local temp_file
62
+ temp_file=$(mktemp)
63
+ trap "rm -f '$temp_file'" EXIT
64
+
65
+ grep -v -E "^(torch|transformers|tokenizers|gradio|mlx|mlx-lm|numpy|plotly)" \
66
+ "$SCRIPT_DIR/$requirements_file" > "$temp_file"
67
+
68
+ if [ -s "$temp_file" ]; then
69
+ pip install -r "$temp_file"
70
+ fi
71
+
72
+ # Special case for lipsync_env
73
+ if [[ "$env_path" == *"lipsync_env"* ]]; then
74
+ pip install av==14.4.0 opencv-python==4.12.0.88
75
+ fi
76
+
77
+ deactivate
78
+ }
79
+
80
+ # Install packages
81
+ safe_install "$SCRIPT_DIR/env" "installed_packages_env.txt"
82
+ safe_install "$SCRIPT_DIR/lipsync_env" "installed_packages_lipsync_env.txt"
83
+
84
+ # Clone and install Chatterbox
85
+ echo "Installing Chatterbox TTS..."
86
+ if [ ! -d "$SCRIPT_DIR/chatterbox" ]; then
87
+ git clone https://github.com/resemble-ai/chatterbox.git "$SCRIPT_DIR/chatterbox"
88
+ fi
89
+ cd "$SCRIPT_DIR/chatterbox" || exit
90
+ "$SCRIPT_DIR/env/bin/pip" install -e .
91
+ cd "$SCRIPT_DIR" || exit
92
+
93
+ # Clone and install LipSync
94
+ echo "Installing LipSync..."
95
+ if [ ! -d "$SCRIPT_DIR/lipsync" ]; then
96
+ git clone https://github.com/mowshon/lipsync.git "$SCRIPT_DIR/lipsync"
97
+ fi
98
+ cd "$SCRIPT_DIR/lipsync" || exit
99
+ "$SCRIPT_DIR/lipsync_env/bin/pip" install -e .
100
+ cd "$SCRIPT_DIR" || exit
101
+
102
+ # Download Wav2Lip model weights
103
+ echo "Downloading Wav2Lip model weights..."
104
+ mkdir -p "$SCRIPT_DIR/lipsync/weights"
105
+ if ! command -v wget &> /dev/null; then
106
+ curl -L https://github.com/mowshon/lipsync/releases/download/v0.1.0/wav2lip_gan.pth -o "$SCRIPT_DIR/lipsync/weights/wav2lip_gan.pth"
107
+ else
108
+ wget https://github.com/mowshon/lipsync/releases/download/v0.1.0/wav2lip_gan.pth -O "$SCRIPT_DIR/lipsync/weights/wav2lip_gan.pth"
109
+ fi
110
+
111
+ # Create cache directory for lipsync
112
+ mkdir -p "$SCRIPT_DIR/lipsync/cache"
113
+
114
+ echo "Installation complete!"
115
+ echo "To use the main environment: source $SCRIPT_DIR/env/bin/activate"
116
+ echo "To use the lipsync environment: source $SCRIPT_DIR/lipsync_env/bin/activate"
Gradio App Ver 1/installed_packages_env.txt ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audioread==3.0.1
2
+ certifi==2025.7.9
3
+ cffi==1.17.1
4
+ cfgv==3.4.0
5
+ charset-normalizer==3.4.2
6
+ -e git+https://github.com/resemble-ai/chatterbox.git@eb90621fa748f341a5b768aed0c0c12fc561894b#egg=chatterbox_tts
7
+ conformer==0.3.2
8
+ decorator==5.2.1
9
+ diffusers==0.29.0
10
+ distlib==0.3.9
11
+ einops==0.8.1
12
+ filelock==3.18.0
13
+ fsspec==2025.5.1
14
+ hf-xet==1.1.5
15
+ huggingface-hub==0.33.4
16
+ identify==2.6.12
17
+ idna==3.10
18
+ importlib_metadata==8.7.0
19
+ Jinja2==3.1.6
20
+ joblib==1.5.1
21
+ lazy_loader==0.4
22
+ librosa==0.11.0
23
+ llvmlite==0.43.0
24
+ MarkupSafe==3.0.2
25
+ mpmath==1.3.0
26
+ msgpack==1.1.1
27
+ networkx==3.2.1
28
+ nodeenv==1.9.1
29
+ numba==0.60.0
30
+ numpy==2.0.2
31
+ onnx==1.18.0
32
+ packaging==25.0
33
+ pillow==11.3.0
34
+ platformdirs==4.3.8
35
+ pooch==1.8.2
36
+ pre_commit==4.2.0
37
+ protobuf==6.31.1
38
+ pycparser==2.22
39
+ PyYAML==6.0.2
40
+ regex==2024.11.6
41
+ requests==2.32.4
42
+ resemble-perth==1.0.1
43
+ s3tokenizer==0.2.0
44
+ safetensors==0.5.3
45
+ scikit-learn==1.6.1
46
+ scipy==1.13.1
47
+ soundfile==0.13.1
48
+ soxr==0.5.0.post1
49
+ sympy==1.13.1
50
+ threadpoolctl==3.6.0
51
+ tokenizers==0.20.3
52
+ torch==2.6.0
53
+ torchaudio==2.6.0
54
+ tqdm==4.67.1
55
+ transformers==4.46.3
56
+ typing_extensions==4.14.1
57
+ urllib3==2.5.0
58
+ virtualenv==20.31.2
59
+ zipp==3.23.0
Gradio App Ver 1/installed_packages_lipsync_env.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ annotated-types==0.7.0
3
+ anyio==4.9.0
4
+ audioread==3.0.1
5
+ av==14.4.0
6
+ Brotli==1.1.0
7
+ certifi==2025.7.9
8
+ cffi==1.17.1
9
+ charset-normalizer==3.4.2
10
+ click==8.2.1
11
+ decorator==5.2.1
12
+ exceptiongroup==1.3.0
13
+ face-alignment==1.4.1
14
+ fastapi==0.116.1
15
+ ffmpy==0.6.0
16
+ filelock==3.18.0
17
+ fsspec==2025.5.1
18
+ gradio==5.36.2
19
+ gradio_client==1.10.4
20
+ groovy==0.1.2
21
+ h11==0.16.0
22
+ hf-xet==1.1.5
23
+ httpcore==1.0.9
24
+ httpx==0.28.1
25
+ huggingface-hub==0.33.4
26
+ idna==3.10
27
+ imageio==2.37.0
28
+ Jinja2==3.1.6
29
+ joblib==1.5.1
30
+ lazy_loader==0.4
31
+ librosa==0.10.2.post1
32
+ -e git+https://github.com/mowshon/lipsync.git@d437451cb02375c165b708b199184f1aa90db5d3#egg=lipsync
33
+ llvmlite==0.44.0
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==3.0.2
36
+ mdurl==0.1.2
37
+ mlx==0.26.3
38
+ mlx-lm==0.26.0
39
+ mpmath==1.3.0
40
+ msgpack==1.1.1
41
+ networkx==3.4.2
42
+ numba==0.61.2
43
+ numpy==2.2.6
44
+ opencv-python==4.12.0.88
45
+ orjson==3.10.18
46
+ packaging==25.0
47
+ pandas==2.3.1
48
+ pillow==11.3.0
49
+ platformdirs==4.3.8
50
+ pooch==1.8.2
51
+ protobuf==6.31.1
52
+ pycparser==2.22
53
+ pydantic==2.11.7
54
+ pydantic_core==2.33.2
55
+ pydub==0.25.1
56
+ Pygments==2.19.2
57
+ python-dateutil==2.9.0.post0
58
+ python-multipart==0.0.20
59
+ pytz==2025.2
60
+ PyYAML==6.0.2
61
+ regex==2024.11.6
62
+ requests==2.32.4
63
+ rich==14.0.0
64
+ ruff==0.12.3
65
+ safehttpx==0.1.6
66
+ safetensors==0.5.3
67
+ scikit-image==0.25.2
68
+ scikit-learn==1.7.0
69
+ scipy==1.15.3
70
+ semantic-version==2.10.0
71
+ shellingham==1.5.4
72
+ six==1.17.0
73
+ sniffio==1.3.1
74
+ soundfile==0.13.1
75
+ soxr==0.5.0.post1
76
+ starlette==0.47.1
77
+ sympy==1.14.0
78
+ threadpoolctl==3.6.0
79
+ tifffile==2025.5.10
80
+ tokenizers==0.21.2
81
+ tomlkit==0.13.3
82
+ torch==2.7.1
83
+ tqdm==4.67.1
84
+ transformers==4.53.1
85
+ typer==0.16.0
86
+ typing-inspection==0.4.1
87
+ typing_extensions==4.14.1
88
+ tzdata==2025.2
89
+ urllib3==2.5.0
90
+ uvicorn==0.35.0
91
+ websockets==15.0.1
Gradio App Ver 1/smolLM3.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from mlx_lm import load, generate
3
+ import time
4
+ import mlx.core as mx
5
+ from mlx.utils import tree_unflatten, tree_flatten
6
+ from typing import Any
7
+ import importlib.metadata # Modern replacement for pkg_resources
8
+
9
+ # Configuration
10
+ MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
11
+ DTYPE = mx.float16
12
+
13
+ class GenerationConfig:
14
+ def __init__(self):
15
+ self.max_tokens = 150 # Conservative for 3B model
16
+ self.seed = 42
17
+
18
+ def load_model_with_config(model_name: str) -> dict:
19
+ """Load model with configuration options."""
20
+ start_time = time.time()
21
+
22
+ print(f"\nLoading model {model_name}...")
23
+ model, tokenizer = load(model_name)
24
+
25
+ # Convert model parameters to specified dtype
26
+ model.update(tree_unflatten([
27
+ (k, v.astype(DTYPE)) for k, v in tree_flatten(model.parameters())
28
+ ]))
29
+
30
+ load_time = time.time() - start_time
31
+ print(f"\nModel Information:")
32
+ print(f"- Load time: {load_time:.2f} seconds")
33
+ print(f"- Device: {'GPU' if mx.metal.is_available() else 'CPU'}")
34
+ print(f"- Precision: {DTYPE}")
35
+ print(f"- Vocab size: {tokenizer.vocab_size:,}")
36
+
37
+ return {"model": model, "tokenizer": tokenizer}
38
+
39
+ def generate_text(
40
+ prompt: str,
41
+ model: Any,
42
+ tokenizer: Any,
43
+ config: GenerationConfig
44
+ ) -> dict:
45
+ """Generate text with basic controls"""
46
+ try:
47
+ if config.seed is not None:
48
+ mx.random.seed(config.seed)
49
+
50
+ start_time = time.time()
51
+
52
+ response = generate(
53
+ model=model,
54
+ tokenizer=tokenizer,
55
+ prompt=prompt,
56
+ max_tokens=config.max_tokens,
57
+ verbose=False
58
+ )
59
+
60
+ generation_time = time.time() - start_time
61
+ num_tokens = len(tokenizer.encode(response))
62
+ tokens_per_sec = num_tokens / generation_time
63
+
64
+ return {
65
+ "generated_text": response,
66
+ "tokens_generated": num_tokens,
67
+ "generation_time": generation_time,
68
+ "tokens_per_second": tokens_per_sec
69
+ }
70
+
71
+ except Exception as e:
72
+ print(f"\nGeneration error: {str(e)}")
73
+ raise
74
+
75
+ def check_versions():
76
+ """Check package versions using modern importlib"""
77
+ try:
78
+ mlx_version = importlib.metadata.version("mlx")
79
+ mlx_lm_version = importlib.metadata.version("mlx-lm")
80
+ print(f"\nPackage Versions:")
81
+ print(f"- MLX: {mlx_version}")
82
+ print(f"- MLX-LM: {mlx_lm_version}")
83
+ except Exception as e:
84
+ print(f"\nVersion check error: {str(e)}")
85
+
86
+ if __name__ == "__main__":
87
+ print(f"Metal (GPU) available: {mx.metal.is_available()}")
88
+ check_versions()
89
+
90
+ try:
91
+ config = GenerationConfig()
92
+ model_data = load_model_with_config(MODEL_NAME)
93
+ model, tokenizer = model_data["model"], model_data["tokenizer"]
94
+
95
+ prompt = "Any famouse historical events happen on July 11th?"
96
+ print(f"\nGenerating response with max_tokens={config.max_tokens}...")
97
+ result = generate_text(prompt, model, tokenizer, config)
98
+
99
+ print("\n=== Generation Results ===")
100
+ print(f"\nPrompt: {prompt}")
101
+ print(f"\nGenerated ({result['tokens_generated']} tokens, {result['generation_time']:.2f}s):")
102
+ print("-" * 50)
103
+ print(result['generated_text'])
104
+ print("-" * 50)
105
+ print(f"\nSpeed: {result['tokens_per_second']:.1f} tokens/second")
106
+
107
+ except Exception as e:
108
+ print(f"\nError: {str(e)}")
109
+ print("\nTroubleshooting:")
110
+ print("- Try reducing max_tokens (currently {config.max_tokens})")
111
+ print("- Check GPU memory usage (Activity Monitor)")
112
+ print("- Update packages: pip install --upgrade mlx mlx-lm")
Gradio App Ver 1/wav2lip.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5b324a04a0e5b150a97422b68b79859e993e1fc1a3b4b87e2fd4a07cfd2e7a
3
+ size 145394162
Gradio App Ver 1/wav2lip_files_in_weights_folder.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Place wav2lip.pth inside folder with name weights
2
+ # Place wav2lip_gan.pth inside folder with name weights
3
+ # mkdir cache static source weights
Gradio App Ver 1/wav2lip_gan.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dce91e8d0d533c6b53192ca41bbfe55b0dbb1859d1a29612e5bc61f5aa71c1e5
3
+ size 145396098
tiktok/dl_tiktok.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import yt_dlp
3
+ import os
4
+
5
+ def download_tiktok(url, download_audio_only=False):
6
+ try:
7
+ ydl_opts = {
8
+ 'outtmpl': '%(uploader)s_%(id)s.%(ext)s',
9
+ 'quiet': False,
10
+ }
11
+
12
+ if download_audio_only:
13
+ ydl_opts['format'] = 'bestaudio/best'
14
+ ydl_opts['postprocessors'] = [{
15
+ 'key': 'FFmpegExtractAudio',
16
+ 'preferredcodec': 'mp3',
17
+ }]
18
+
19
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
20
+ ydl.download([url])
21
+
22
+ return True
23
+ except Exception as e:
24
+ print(f"Error: {str(e)}")
25
+ return False
26
+
27
+ if __name__ == "__main__":
28
+ parser = argparse.ArgumentParser(description='Download TikTok videos or audio.')
29
+ parser.add_argument('url', help='TikTok video URL')
30
+ parser.add_argument('--audio', action='store_true', help='Download audio only')
31
+
32
+ args = parser.parse_args()
33
+
34
+ if not args.url.startswith('https://www.tiktok.com/'):
35
+ print("Error: Please provide a valid TikTok URL")
36
+ else:
37
+ download_tiktok(args.url, args.audio)
tiktok/dl_tiktok2.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import yt_dlp
3
+ import os
4
+ from pathlib import Path
5
+
6
+ def download_tiktok(url, download_audio_only=False, output_dir=None):
7
+ """
8
+ Download TikTok video or audio using yt-dlp
9
+
10
+ Args:
11
+ url (str): TikTok video URL
12
+ download_audio_only (bool): Whether to download only audio
13
+ output_dir (str): Custom output directory (None for current directory)
14
+ """
15
+ try:
16
+ # Set output template
17
+ output_template = os.path.join(output_dir, '%(uploader)s_%(id)s.%(ext)s') if output_dir else '%(uploader)s_%(id)s.%(ext)s'
18
+
19
+ ydl_opts = {
20
+ 'outtmpl': output_template,
21
+ 'quiet': False,
22
+ 'no_warnings': False,
23
+ 'ignoreerrors': False,
24
+ 'restrictfilenames': True,
25
+ }
26
+
27
+ if download_audio_only:
28
+ ydl_opts.update({
29
+ 'format': 'bestaudio/best',
30
+ 'postprocessors': [{
31
+ 'key': 'FFmpegExtractAudio',
32
+ 'preferredcodec': 'mp3',
33
+ 'preferredquality': '192',
34
+ }],
35
+ 'extractaudio': True, # Only keep the audio
36
+ })
37
+ else:
38
+ ydl_opts.update({
39
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
40
+ 'merge_output_format': 'mp4',
41
+ })
42
+
43
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
44
+ info_dict = ydl.extract_info(url, download=True)
45
+
46
+ # Get the actual filename that was saved
47
+ if download_audio_only:
48
+ actual_filename = ydl.prepare_filename(info_dict).replace('.webm', '.mp3').replace('.m4a', '.mp3')
49
+ else:
50
+ actual_filename = ydl.prepare_filename(info_dict)
51
+
52
+ print(f"\nSuccessfully downloaded: {actual_filename}")
53
+ return True, actual_filename
54
+
55
+ except yt_dlp.utils.DownloadError as e:
56
+ print(f"\nDownload Error: {str(e)}")
57
+ return False, None
58
+ except Exception as e:
59
+ print(f"\nUnexpected Error: {str(e)}")
60
+ return False, None
61
+
62
+ if __name__ == "__main__":
63
+ parser = argparse.ArgumentParser(description='Download TikTok videos or audio using yt-dlp (most reliable method)')
64
+ parser.add_argument('url', help='TikTok video URL')
65
+ parser.add_argument('--audio', action='store_true', help='Download audio only (MP3 format)')
66
+ parser.add_argument('--output', '-o', help='Custom output directory (default: current directory)')
67
+
68
+ args = parser.parse_args()
69
+
70
+ if not args.url.startswith(('https://www.tiktok.com/', 'https://tiktok.com/')):
71
+ print("Error: Please provide a valid TikTok URL")
72
+ else:
73
+ success, filename = download_tiktok(
74
+ args.url,
75
+ download_audio_only=args.audio,
76
+ output_dir=args.output
77
+ )
78
+
79
+ if not success:
80
+ print("Failed to download the TikTok content")
81
+ exit(1)
tiktok/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ yt-dlp
tiktok/tiktok.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Create tiktok directory
2
+ # mkdir tiktok
3
+ # cd tiktok
4
+ # python3.10 -m venv env
5
+ # source env/bin/activate
6
+ # (env) python dl_tiktok2.py "https://www.tiktok.com/@akabadgyal/video/7523743037713911062"
7
+ # (env) python dl_tiktok2.py "place tik tok address here"