Spaces:
Runtime error
Runtime error
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
# utils.py
|
| 2 |
-
|
| 3 |
import os
|
| 4 |
import re
|
| 5 |
import json
|
|
@@ -15,11 +13,10 @@ import tiktoken
|
|
| 15 |
from groq import Groq
|
| 16 |
import numpy as np
|
| 17 |
import torch
|
| 18 |
-
import random
|
| 19 |
|
| 20 |
class DialogueItem(BaseModel):
|
| 21 |
-
speaker: Literal["Jane", "John"]
|
| 22 |
-
display_speaker: str = "Jane"
|
| 23 |
text: str
|
| 24 |
|
| 25 |
class Dialogue(BaseModel):
|
|
@@ -47,8 +44,7 @@ def truncate_text(text, max_tokens=2048):
|
|
| 47 |
|
| 48 |
def extract_text_from_url(url):
|
| 49 |
"""
|
| 50 |
-
Fetches and extracts readable text from a given URL
|
| 51 |
-
(stripping out scripts, styles, etc.).
|
| 52 |
"""
|
| 53 |
print("[LOG] Extracting text from URL:", url)
|
| 54 |
try:
|
|
@@ -85,8 +81,7 @@ def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
|
|
| 85 |
|
| 86 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
| 87 |
"""
|
| 88 |
-
Checks if the fetched text meets our sufficiency criteria
|
| 89 |
-
(e.g., at least 500 words).
|
| 90 |
"""
|
| 91 |
word_count = len(text.split())
|
| 92 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
|
@@ -98,6 +93,7 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
| 98 |
Appends it to our aggregated info if found.
|
| 99 |
"""
|
| 100 |
print("[LOG] Querying LLM for additional information.")
|
|
|
|
| 101 |
system_prompt = (
|
| 102 |
"You are an AI assistant with extensive knowledge up to 2023-10. "
|
| 103 |
"Provide additional relevant information on the following topic based on your knowledge base.\n\n"
|
|
@@ -105,7 +101,9 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
| 105 |
f"Existing Information: {existing_text}\n\n"
|
| 106 |
"Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
|
| 107 |
)
|
|
|
|
| 108 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
|
|
|
| 109 |
try:
|
| 110 |
response = groq_client.chat.completions.create(
|
| 111 |
messages=[{"role": "system", "content": system_prompt}],
|
|
@@ -113,19 +111,22 @@ def query_llm_for_additional_info(topic: str, existing_text: str) -> str:
|
|
| 113 |
max_tokens=1024,
|
| 114 |
temperature=0.7
|
| 115 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
except Exception as e:
|
| 117 |
print("[ERROR] Groq API error during fallback:", e)
|
| 118 |
return ""
|
| 119 |
-
additional_info = response.choices[0].message.content.strip()
|
| 120 |
-
print("[DEBUG] Additional information from LLM:")
|
| 121 |
-
print(additional_info)
|
| 122 |
-
return additional_info
|
| 123 |
|
| 124 |
def research_topic(topic: str) -> str:
|
| 125 |
"""
|
| 126 |
Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
|
| 127 |
for more data if the aggregated text is insufficient.
|
| 128 |
"""
|
|
|
|
| 129 |
sources = {
|
| 130 |
"BBC": "https://feeds.bbci.co.uk/news/rss.xml",
|
| 131 |
"CNN": "http://rss.cnn.com/rss/edition.rss",
|
|
@@ -137,484 +138,472 @@ def research_topic(topic: str) -> str:
|
|
| 137 |
"Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
|
| 138 |
}
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
def fetch_wikipedia_summary(topic: str) -> str:
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
|
| 211 |
def fetch_rss_feed(feed_url: str) -> list:
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
def fetch_article_text(link: str) -> str:
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
def generate_script(
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
):
|
| 278 |
-
|
| 279 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
|
| 281 |
-
|
| 282 |
-
numeric_minutes = 3
|
| 283 |
-
match = re.search(r"(\d+)", target_length)
|
| 284 |
-
if match:
|
| 285 |
-
numeric_minutes = int(match.group(1))
|
| 286 |
|
| 287 |
-
|
| 288 |
-
max_words = numeric_minutes * words_per_minute
|
| 289 |
|
| 290 |
-
|
| 291 |
-
"Humorous": "funny and exciting, makes people chuckle",
|
| 292 |
-
"Formal": "business-like, well-structured, professional",
|
| 293 |
-
"Casual": "like a conversation between close friends, relaxed and informal",
|
| 294 |
-
"Youthful": "like how teenagers might chat, energetic and lively"
|
| 295 |
-
}
|
| 296 |
-
chosen_tone = tone_map.get(tone, "casual")
|
| 297 |
-
|
| 298 |
-
# Determine sponsor instructions based on sponsor_provided and sponsor_style
|
| 299 |
-
if sponsor_provided:
|
| 300 |
-
if sponsor_style == "Separate Break":
|
| 301 |
-
sponsor_instructions = (
|
| 302 |
-
"If sponsor content is provided, include it in a separate ad break (~30 seconds). "
|
| 303 |
-
"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
|
| 304 |
-
)
|
| 305 |
-
else:
|
| 306 |
-
sponsor_instructions = (
|
| 307 |
-
"If sponsor content is provided, blend it naturally (~30 seconds) into the conversation. "
|
| 308 |
-
"Avoid abrupt transitions."
|
| 309 |
-
)
|
| 310 |
-
else:
|
| 311 |
-
sponsor_instructions = "" # No sponsor instructions if sponsor_provided is empty
|
| 312 |
-
|
| 313 |
-
prompt = (
|
| 314 |
-
f"{system_prompt}\n"
|
| 315 |
-
f"TONE: {chosen_tone}\n"
|
| 316 |
-
f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
|
| 317 |
-
f"INPUT TEXT: {input_text}\n\n"
|
| 318 |
-
f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
|
| 319 |
-
"Please provide the output in the following JSON format without any additional text:\n\n"
|
| 320 |
-
"{\n"
|
| 321 |
-
' "dialogue": [\n'
|
| 322 |
-
' {\n'
|
| 323 |
-
' "speaker": "Jane",\n'
|
| 324 |
-
' "text": "..." \n'
|
| 325 |
-
' },\n'
|
| 326 |
-
' {\n'
|
| 327 |
-
' "speaker": "John",\n'
|
| 328 |
-
' "text": "..." \n'
|
| 329 |
-
' }\n'
|
| 330 |
-
" ]\n"
|
| 331 |
-
"}"
|
| 332 |
-
)
|
| 333 |
-
print("[LOG] Sending prompt to Groq:")
|
| 334 |
-
print(prompt)
|
| 335 |
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
messages=[{"role": "system", "content": prompt}],
|
| 339 |
-
model="llama-3.3-70b-versatile",
|
| 340 |
-
max_tokens=2048,
|
| 341 |
-
temperature=0.7
|
| 342 |
-
)
|
| 343 |
-
except Exception as e:
|
| 344 |
-
print("[ERROR] Groq API error:", e)
|
| 345 |
-
raise ValueError(f"Error communicating with Groq API: {str(e)}")
|
| 346 |
|
| 347 |
-
|
| 348 |
-
start_index = raw_content.find('{')
|
| 349 |
-
end_index = raw_content.rfind('}')
|
| 350 |
-
if start_index == -1 or end_index == -1:
|
| 351 |
-
raise ValueError("Failed to parse dialogue: No JSON found.")
|
| 352 |
|
| 353 |
-
|
| 354 |
|
| 355 |
-
try:
|
| 356 |
-
data = json.loads(json_str)
|
| 357 |
-
dialogue_list = data.get("dialogue", [])
|
| 358 |
-
|
| 359 |
-
for d in dialogue_list:
|
| 360 |
-
raw_speaker = d.get("speaker", "Jane")
|
| 361 |
-
if raw_speaker.lower() == host_name.lower():
|
| 362 |
-
d["speaker"] = "Jane"
|
| 363 |
-
d["display_speaker"] = host_name
|
| 364 |
-
elif raw_speaker.lower() == guest_name.lower():
|
| 365 |
-
d["speaker"] = "John"
|
| 366 |
-
d["display_speaker"] = guest_name
|
| 367 |
-
else:
|
| 368 |
-
d["speaker"] = "Jane"
|
| 369 |
-
d["display_speaker"] = raw_speaker
|
| 370 |
-
|
| 371 |
-
new_dialogue_items = []
|
| 372 |
-
for d in dialogue_list:
|
| 373 |
-
if "display_speaker" not in d:
|
| 374 |
-
d["display_speaker"] = d["speaker"]
|
| 375 |
-
new_dialogue_items.append(DialogueItem(**d))
|
| 376 |
-
|
| 377 |
-
return Dialogue(dialogue=new_dialogue_items)
|
| 378 |
-
except json.JSONDecodeError as e:
|
| 379 |
-
print("[ERROR] JSON decoding (format) failed:", e)
|
| 380 |
-
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
| 381 |
-
except Exception as e:
|
| 382 |
-
print("[ERROR] JSON decoding failed:", e)
|
| 383 |
-
raise ValueError(f"Failed to parse dialogue: {str(e)}")
|
| 384 |
-
|
| 385 |
-
def transcribe_youtube_video(video_url: str) -> str:
|
| 386 |
-
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
| 387 |
-
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
| 388 |
-
if not video_id_match:
|
| 389 |
-
raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
|
| 390 |
-
|
| 391 |
-
video_id = video_id_match.group(1)
|
| 392 |
-
print("[LOG] Extracted video ID:", video_id)
|
| 393 |
-
|
| 394 |
-
base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
|
| 395 |
-
params = {
|
| 396 |
-
"video_id": video_id,
|
| 397 |
-
"lang": "en"
|
| 398 |
-
}
|
| 399 |
-
headers = {
|
| 400 |
-
"x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
|
| 401 |
-
"x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
|
| 402 |
-
}
|
| 403 |
|
| 404 |
-
|
| 405 |
-
response = requests.get(base_url, headers=headers, params=params, timeout=30)
|
| 406 |
-
print("[LOG] RapidAPI Response Status Code:", response.status_code)
|
| 407 |
-
print("[LOG] RapidAPI Response Body:", response.text)
|
| 408 |
|
| 409 |
-
|
| 410 |
-
raise ValueError(f"RapidAPI transcription error: {response.status_code}, {response.text}")
|
| 411 |
|
| 412 |
-
|
| 413 |
-
if not isinstance(data, list) or not data:
|
| 414 |
-
raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
|
| 415 |
|
| 416 |
-
|
| 417 |
-
if not transcript_as_text:
|
| 418 |
-
raise ValueError("transcriptionAsText field is missing or empty.")
|
| 419 |
|
| 420 |
-
|
| 421 |
-
print(f"[DEBUG] Transcript Length: {len(transcript_as_text)} characters.")
|
| 422 |
-
snippet = transcript_as_text[:200] + "..." if len(transcript_as_text) > 200 else transcript_as_text
|
| 423 |
-
print(f"[DEBUG] Transcript Snippet: {snippet}")
|
| 424 |
|
| 425 |
-
|
| 426 |
|
| 427 |
-
|
| 428 |
-
print("[ERROR] RapidAPI transcription error:", e)
|
| 429 |
-
raise ValueError(f"Error transcribing YouTube video via RapidAPI: {str(e)}")
|
| 430 |
|
| 431 |
-
|
| 432 |
-
"""
|
| 433 |
-
Calls Deepgram TTS with the text, returning a path to a temp MP3 file.
|
| 434 |
-
We also do some pre-processing for punctuation, abbreviations, numeric expansions,
|
| 435 |
-
plus emotive expressions (ha, sigh, etc.).
|
| 436 |
-
"""
|
| 437 |
-
try:
|
| 438 |
-
print(f"[LOG] Generating audio for speaker: {speaker}")
|
| 439 |
-
processed_text = _preprocess_text_for_tts(text, speaker)
|
| 440 |
|
| 441 |
-
|
| 442 |
-
params = {
|
| 443 |
-
"model": "aura-asteria-en", # female by default
|
| 444 |
-
}
|
| 445 |
-
if speaker == "John":
|
| 446 |
-
params["model"] = "aura-zeus-en"
|
| 447 |
|
| 448 |
-
|
| 449 |
-
"Accept": "audio/mpeg",
|
| 450 |
-
"Content-Type": "application/json",
|
| 451 |
-
"Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
|
| 452 |
-
}
|
| 453 |
-
body = {
|
| 454 |
-
"text": processed_text
|
| 455 |
-
}
|
| 456 |
|
| 457 |
-
|
| 458 |
-
if response.status_code != 200:
|
| 459 |
-
raise ValueError(f"Deepgram TTS error: {response.status_code}, {response.text}")
|
| 460 |
|
| 461 |
-
|
| 462 |
-
if 'audio/mpeg' not in content_type:
|
| 463 |
-
raise ValueError("Unexpected Content-Type from Deepgram.")
|
| 464 |
|
| 465 |
-
|
| 466 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 467 |
-
if chunk:
|
| 468 |
-
mp3_file.write(chunk)
|
| 469 |
-
mp3_path = mp3_file.name
|
| 470 |
|
| 471 |
-
|
| 472 |
-
audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
|
| 473 |
-
audio_seg = effects.normalize(audio_seg)
|
| 474 |
|
| 475 |
-
|
| 476 |
-
audio_seg.export(final_mp3_path, format="mp3")
|
| 477 |
|
| 478 |
-
|
| 479 |
-
os.remove(mp3_path)
|
| 480 |
|
| 481 |
-
|
| 482 |
-
except Exception as e:
|
| 483 |
-
print("[ERROR] Error generating audio:", e)
|
| 484 |
-
raise ValueError(f"Error generating audio: {str(e)}")
|
| 485 |
|
| 486 |
-
|
| 487 |
-
pass
|
| 488 |
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
1) "SaaS" => "sass"
|
| 492 |
-
2) Insert periods for uppercase abbreviations -> remove for TTS
|
| 493 |
-
3) Preserve numbers for natural TTS pronunciation
|
| 494 |
-
4) Expand leftover all-caps
|
| 495 |
-
5) Emotive placeholders for 'ha', 'haha', 'sigh', 'groan', etc.
|
| 496 |
-
6) If speaker != Jane, insert filler words
|
| 497 |
-
7) Remove random fillers
|
| 498 |
-
8) Capitalize sentence starts
|
| 499 |
-
"""
|
| 500 |
-
# 1) "SaaS" => "sass"
|
| 501 |
-
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
| 502 |
-
|
| 503 |
-
# 2) Insert periods in uppercase abbreviations (>=2 chars), then remove them
|
| 504 |
-
def insert_periods_for_abbrev(m):
|
| 505 |
-
abbr = m.group(0)
|
| 506 |
-
parted = ".".join(list(abbr)) + "."
|
| 507 |
-
return parted
|
| 508 |
-
text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
|
| 509 |
-
text = re.sub(r"\.\.", ".", text)
|
| 510 |
-
def remove_periods_for_tts(m):
|
| 511 |
-
chunk = m.group(0)
|
| 512 |
-
return chunk.replace(".", " ").strip()
|
| 513 |
-
text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
|
| 514 |
-
|
| 515 |
-
# 3) Preserve numbers by removing any digit-specific processing
|
| 516 |
-
# Let TTS handle natural number pronunciation
|
| 517 |
-
|
| 518 |
-
# 4) Hyphens -> spaces (but preserve hyphenated numbers)
|
| 519 |
-
text = re.sub(r"(?<!\d)-(?!\d)", " ", text)
|
| 520 |
-
|
| 521 |
-
# 5) Emotive placeholders
|
| 522 |
-
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
| 523 |
-
text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
|
| 524 |
-
text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
|
| 525 |
-
|
| 526 |
-
# 6) Insert filler words if speaker != "Jane"
|
| 527 |
-
if speaker != "Jane":
|
| 528 |
-
def insert_thinking_pause(m):
|
| 529 |
-
word = m.group(1)
|
| 530 |
-
if random.random() < 0.3:
|
| 531 |
-
filler = random.choice(['hmm,', 'well,', 'let me see,'])
|
| 532 |
-
return f"{word}..., {filler}"
|
| 533 |
-
else:
|
| 534 |
-
return f"{word}...,"
|
| 535 |
-
keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
|
| 536 |
-
text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
|
| 537 |
-
|
| 538 |
-
conj_pattern = r"\b(and|but|so|because|however)\b"
|
| 539 |
-
text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
|
| 540 |
-
|
| 541 |
-
# 7) Remove random fillers
|
| 542 |
-
text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
|
| 543 |
-
|
| 544 |
-
# 8) Capitalize sentence starts
|
| 545 |
-
def capitalize_match(m):
|
| 546 |
-
return m.group().upper()
|
| 547 |
-
text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_match, text)
|
| 548 |
-
|
| 549 |
-
return text.strip()
|
| 550 |
-
|
| 551 |
-
def _spell_digits(d: str) -> str:
|
| 552 |
-
"""
|
| 553 |
-
Convert individual digits '3' -> 'three'.
|
| 554 |
-
"""
|
| 555 |
-
digit_map = {
|
| 556 |
-
'0': 'zero',
|
| 557 |
-
'1': 'one',
|
| 558 |
-
'2': 'two',
|
| 559 |
-
'3': 'three',
|
| 560 |
-
'4': 'four',
|
| 561 |
-
'5': 'five',
|
| 562 |
-
'6': 'six',
|
| 563 |
-
'7': 'seven',
|
| 564 |
-
'8': 'eight',
|
| 565 |
-
'9': 'nine'
|
| 566 |
-
}
|
| 567 |
-
return " ".join(digit_map[ch] for ch in d if ch in digit_map)
|
| 568 |
|
| 569 |
-
def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
|
| 570 |
-
"""
|
| 571 |
-
Mixes 'spoken' with a default bg_music.mp3 or user-provided custom music:
|
| 572 |
-
1) Start with 2 seconds of music alone before speech begins.
|
| 573 |
-
2) Loop the music if it's shorter than the final audio length.
|
| 574 |
-
3) Lower music volume so the speech is clear.
|
| 575 |
-
"""
|
| 576 |
-
if custom_music_path:
|
| 577 |
-
music_path = custom_music_path
|
| 578 |
-
else:
|
| 579 |
-
music_path = "bg_music.mp3"
|
| 580 |
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
except Exception as e:
|
| 584 |
-
print("[ERROR] Failed to load background music:", e)
|
| 585 |
-
return spoken
|
| 586 |
|
| 587 |
-
|
| 588 |
|
| 589 |
-
|
| 590 |
-
looped_music = AudioSegment.empty()
|
| 591 |
-
while len(looped_music) < total_length_ms:
|
| 592 |
-
looped_music += bg_music
|
| 593 |
|
| 594 |
-
|
| 595 |
-
final_mix = looped_music.overlay(spoken, position=2000)
|
| 596 |
-
return final_mix
|
| 597 |
|
| 598 |
-
|
| 599 |
-
def call_groq_api_for_qa(system_prompt: str) -> str:
|
| 600 |
-
"""
|
| 601 |
-
A minimal placeholder for your short Q&A LLM call.
|
| 602 |
-
Must return a JSON string, e.g.:
|
| 603 |
-
{"speaker": "John", "text": "Short answer here"}
|
| 604 |
-
"""
|
| 605 |
-
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
| 606 |
-
try:
|
| 607 |
-
response = groq_client.chat.completions.create(
|
| 608 |
-
messages=[{"role": "system", "content": system_prompt}],
|
| 609 |
-
model="llama-3.3-70b-versatile",
|
| 610 |
-
max_tokens=512,
|
| 611 |
-
temperature=0.7
|
| 612 |
-
)
|
| 613 |
-
except Exception as e:
|
| 614 |
-
print("[ERROR] Groq API error:", e)
|
| 615 |
-
fallback = {"speaker": "John", "text": "I'm sorry, I'm having trouble answering right now."}
|
| 616 |
-
return json.dumps(fallback)
|
| 617 |
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import json
|
|
|
|
| 13 |
from groq import Groq
|
| 14 |
import numpy as np
|
| 15 |
import torch
|
|
|
|
| 16 |
|
| 17 |
class DialogueItem(BaseModel):
|
| 18 |
+
speaker: Literal["Jane", "John"] # TTS voice
|
| 19 |
+
display_speaker: str = "Jane" # For display in transcript
|
| 20 |
text: str
|
| 21 |
|
| 22 |
class Dialogue(BaseModel):
|
|
|
|
| 44 |
|
| 45 |
def extract_text_from_url(url):
|
| 46 |
"""
|
| 47 |
+
Fetches and extracts readable text from a given URL (stripping out scripts, styles, etc.).
|
|
|
|
| 48 |
"""
|
| 49 |
print("[LOG] Extracting text from URL:", url)
|
| 50 |
try:
|
|
|
|
| 81 |
|
| 82 |
def is_sufficient(text: str, min_word_count: int = 500) -> bool:
|
| 83 |
"""
|
| 84 |
+
Checks if the fetched text meets our sufficiency criteria (e.g., at least 500 words).
|
|
|
|
| 85 |
"""
|
| 86 |
word_count = len(text.split())
|
| 87 |
print(f"[DEBUG] Aggregated word count: {word_count}")
|
|
|
|
| 93 |
Appends it to our aggregated info if found.
|
| 94 |
"""
|
| 95 |
print("[LOG] Querying LLM for additional information.")
|
| 96 |
+
|
| 97 |
system_prompt = (
|
| 98 |
"You are an AI assistant with extensive knowledge up to 2023-10. "
|
| 99 |
"Provide additional relevant information on the following topic based on your knowledge base.\n\n"
|
|
|
|
| 101 |
f"Existing Information: {existing_text}\n\n"
|
| 102 |
"Please add more insightful details, facts, and perspectives to enhance the understanding of the topic."
|
| 103 |
)
|
| 104 |
+
|
| 105 |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
| 106 |
+
|
| 107 |
try:
|
| 108 |
response = groq_client.chat.completions.create(
|
| 109 |
messages=[{"role": "system", "content": system_prompt}],
|
|
|
|
| 111 |
max_tokens=1024,
|
| 112 |
temperature=0.7
|
| 113 |
)
|
| 114 |
+
|
| 115 |
+
additional_info = response.choices[0].message.content.strip()
|
| 116 |
+
print("[DEBUG] Additional information from LLM:")
|
| 117 |
+
print(additional_info)
|
| 118 |
+
return additional_info
|
| 119 |
+
|
| 120 |
except Exception as e:
|
| 121 |
print("[ERROR] Groq API error during fallback:", e)
|
| 122 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
def research_topic(topic: str) -> str:
|
| 125 |
"""
|
| 126 |
Gathers info from various RSS feeds and Wikipedia. If needed, queries the LLM
|
| 127 |
for more data if the aggregated text is insufficient.
|
| 128 |
"""
|
| 129 |
+
|
| 130 |
sources = {
|
| 131 |
"BBC": "https://feeds.bbci.co.uk/news/rss.xml",
|
| 132 |
"CNN": "http://rss.cnn.com/rss/edition.rss",
|
|
|
|
| 138 |
"Google News - Custom": f"https://news.google.com/rss/search?q={requests.utils.quote(topic)}&hl=en-IN&gl=IN&ceid=IN:en",
|
| 139 |
}
|
| 140 |
|
| 141 |
+
summary_parts = [] # Wikipedia summary
|
| 142 |
+
wiki_summary = fetch_wikipedia_summary(topic)
|
| 143 |
+
|
| 144 |
+
if wiki_summary:
|
| 145 |
+
summary_parts.append(f"From Wikipedia: {wiki_summary}")
|
| 146 |
+
|
| 147 |
+
# For each RSS feed
|
| 148 |
+
for name, feed_url in sources.items():
|
| 149 |
+
try:
|
| 150 |
+
items = fetch_rss_feed(feed_url)
|
| 151 |
+
if not items:
|
| 152 |
+
continue
|
| 153 |
+
|
| 154 |
+
title, desc, link = find_relevant_article(items, topic, min_match=2)
|
| 155 |
+
|
| 156 |
+
if link:
|
| 157 |
+
article_text = fetch_article_text(link)
|
| 158 |
+
if article_text:
|
| 159 |
+
summary_parts.append(f"From {name}: {article_text}")
|
| 160 |
+
else:
|
| 161 |
+
summary_parts.append(f"From {name}: {title} - {desc}")
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
print(f"[ERROR] Error fetching from {name} RSS feed:", e)
|
| 165 |
+
continue
|
| 166 |
+
|
| 167 |
+
aggregated_info = " ".join(summary_parts)
|
| 168 |
+
|
| 169 |
+
print("[DEBUG] Aggregated info from primary sources:")
|
| 170 |
+
print(aggregated_info)
|
| 171 |
+
|
| 172 |
+
# If not enough data, fallback to LLM
|
| 173 |
+
if not is_sufficient(aggregated_info):
|
| 174 |
+
print("[LOG] Insufficient info from primary sources. Fallback to LLM.")
|
| 175 |
+
additional_info = query_llm_for_additional_info(topic, aggregated_info)
|
| 176 |
+
|
| 177 |
+
if additional_info:
|
| 178 |
+
aggregated_info += " " + additional_info
|
| 179 |
+
else:
|
| 180 |
+
print("[ERROR] Failed to retrieve additional info from LLM.")
|
| 181 |
+
|
| 182 |
+
if not aggregated_info:
|
| 183 |
+
return f"Sorry, I couldn't find recent information on '{topic}'."
|
| 184 |
+
|
| 185 |
+
return aggregated_info
|
| 186 |
|
| 187 |
def fetch_wikipedia_summary(topic: str) -> str:
|
| 188 |
+
"""
|
| 189 |
+
Fetch a quick Wikipedia summary of the topic via the official Wikipedia API.
|
| 190 |
+
"""
|
| 191 |
+
print("[LOG] Fetching Wikipedia summary for:", topic)
|
| 192 |
+
|
| 193 |
+
try:
|
| 194 |
+
search_url = (
|
| 195 |
+
f"https://en.wikipedia.org/w/api.php?action=opensearch&search={requests.utils.quote(topic)}"
|
| 196 |
+
"&limit=1&namespace=0&format=json"
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
resp = requests.get(search_url)
|
| 200 |
+
if resp.status_code != 200:
|
| 201 |
+
print(f"[ERROR] Failed to fetch Wikipedia search results for {topic}")
|
| 202 |
+
return ""
|
| 203 |
+
|
| 204 |
+
data = resp.json()
|
| 205 |
+
if len(data) > 1 and data[1]:
|
| 206 |
+
title = data[1][0]
|
| 207 |
+
summary_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(title)}"
|
| 208 |
+
s_resp = requests.get(summary_url)
|
| 209 |
+
|
| 210 |
+
if s_resp.status_code == 200:
|
| 211 |
+
s_data = s_resp.json()
|
| 212 |
+
if "extract" in s_data:
|
| 213 |
+
print("[LOG] Wikipedia summary fetched successfully.")
|
| 214 |
+
return s_data["extract"]
|
| 215 |
+
return ""
|
| 216 |
+
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f"[ERROR] Exception during Wikipedia summary fetch: {e}")
|
| 219 |
+
return ""
|
| 220 |
|
| 221 |
def fetch_rss_feed(feed_url: str) -> list:
|
| 222 |
+
"""
|
| 223 |
+
Pulls RSS feed data from a given URL and returns items.
|
| 224 |
+
"""
|
| 225 |
+
print("[LOG] Fetching RSS feed:", feed_url)
|
| 226 |
+
|
| 227 |
+
try:
|
| 228 |
+
resp = requests.get(feed_url)
|
| 229 |
+
if resp.status_code != 200:
|
| 230 |
+
print(f"[ERROR] Failed to fetch RSS feed: {feed_url}")
|
| 231 |
+
return []
|
| 232 |
+
|
| 233 |
+
soup = BeautifulSoup(resp.content, "xml")
|
| 234 |
+
items = soup.find_all("item")
|
| 235 |
+
return items
|
| 236 |
+
|
| 237 |
+
except Exception as e:
|
| 238 |
+
print(f"[ERROR] Exception fetching RSS feed {feed_url}: {e}")
|
| 239 |
+
return []
|
| 240 |
|
| 241 |
def find_relevant_article(items, topic: str, min_match=2) -> tuple:
|
| 242 |
+
"""
|
| 243 |
+
Check each article in the RSS feed for mention of the topic by counting
|
| 244 |
+
the number of keyword matches.
|
| 245 |
+
"""
|
| 246 |
+
print("[LOG] Finding relevant articles...")
|
| 247 |
+
|
| 248 |
+
keywords = re.findall(r'\w+', topic.lower())
|
| 249 |
+
|
| 250 |
+
for item in items:
|
| 251 |
+
title = item.find("title").get_text().strip() if item.find("title") else ""
|
| 252 |
+
description = item.find("description").get_text().strip() if item.find("description") else ""
|
| 253 |
+
|
| 254 |
+
text = (title + " " + description).lower()
|
| 255 |
+
|
| 256 |
+
matches = sum(1 for kw in keywords if kw in text)
|
| 257 |
+
|
| 258 |
+
if matches >= min_match:
|
| 259 |
+
link = item.find("link").get_text().strip() if item.find("link") else ""
|
| 260 |
+
print(f"[LOG] Relevant article found: {title}")
|
| 261 |
+
return title, description, link
|
| 262 |
+
|
| 263 |
+
return None, None, None
|
| 264 |
|
| 265 |
def fetch_article_text(link: str) -> str:
|
| 266 |
+
"""
|
| 267 |
+
Fetch the article text from the given link (first 5 paragraphs).
|
| 268 |
+
"""
|
| 269 |
+
print("[LOG] Fetching article text from:", link)
|
| 270 |
+
|
| 271 |
+
if not link:
|
| 272 |
+
print("[LOG] No link provided for article text.")
|
| 273 |
+
return ""
|
| 274 |
+
|
| 275 |
+
try:
|
| 276 |
+
resp = requests.get(link)
|
| 277 |
+
|
| 278 |
+
if resp.status_code != 200:
|
| 279 |
+
print(f"[ERROR] Failed to fetch article from {link}")
|
| 280 |
+
return ""
|
| 281 |
+
|
| 282 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
| 283 |
+
|
| 284 |
+
paragraphs = soup.find_all("p")
|
| 285 |
+
|
| 286 |
+
text = " ".join(p.get_text() for p in paragraphs[:5]) # first 5 paragraphs
|
| 287 |
+
|
| 288 |
+
print("[LOG] Article text fetched successfully.")
|
| 289 |
+
|
| 290 |
+
return text.strip()
|
| 291 |
+
|
| 292 |
+
except Exception as e:
|
| 293 |
+
print(f"[ERROR] Error fetching article text: {e}")
|
| 294 |
+
return ""
|
| 295 |
|
| 296 |
def generate_script(
|
| 297 |
+
system_prompt: str,
|
| 298 |
+
input_text: str,
|
| 299 |
+
tone: str,
|
| 300 |
+
target_length: str,
|
| 301 |
+
host_name: str = "Jane",
|
| 302 |
+
guest_name: str = "John",
|
| 303 |
+
sponsor_style: str = "Separate Break",
|
| 304 |
+
sponsor_provided=None # Accept sponsor_provided parameter
|
| 305 |
):
|
| 306 |
+
|
| 307 |
+
print("[LOG] Generating script with tone:", tone, "and length:", target_length)
|
| 308 |
+
|
| 309 |
+
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
| 310 |
+
|
| 311 |
+
words_per_minute = 150
|
| 312 |
+
numeric_minutes = 3
|
| 313 |
+
|
| 314 |
+
match = re.search(r"(\d+)", target_length)
|
| 315 |
+
|
| 316 |
+
if match:
|
| 317 |
+
numeric_minutes = int(match.group(1))
|
| 318 |
+
|
| 319 |
+
min_words = max(50, numeric_minutes * 100)
|
| 320 |
+
|
| 321 |
+
max_words = numeric_minutes * words_per_minute
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
# Tone mapping dictionary
|
| 333 |
+
|
| 334 |
+
tone_map={
|
| 335 |
+
"Humorous":"funny and exciting,makes people chuckle",
|
| 336 |
+
"Formal":"business-like,wells-structured,"professional",
|
| 337 |
+
"Casual":"like a conversation between close friends,"relaxed and informal",
|
| 338 |
+
"Youthful":"like how teenagers might chat,"energetic and lively"
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
chosen_tone=tone_map.get(tone,"casual")
|
| 342 |
+
|
| 343 |
+
# Determine sponsor instructions based on sponsor_provided and sponsor_style
|
| 344 |
+
|
| 345 |
+
if sponsor_provided:
|
| 346 |
+
|
| 347 |
+
if sponsor_style=="Separate Break":
|
| 348 |
+
sponsor_instructions=(
|
| 349 |
+
|
| 350 |
+
"If sponsor content is provided,"include it in a separate ad break (~30 seconds). "
|
| 351 |
+
|
| 352 |
+
"Use phrasing like 'Now a word from our sponsor...' and end with 'Back to the show' or similar."
|
| 353 |
+
)
|
| 354 |
|
| 355 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
+
sponsor_instructions=(
|
|
|
|
| 358 |
|
| 359 |
+
"If sponsor content is provided,"blend it naturally (~30 seconds) into the conversation."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
+
"Avoid abrupt transitions."
|
| 362 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
+
sponsor_instructions="" # No sponsor instructions if sponsor_provided is empty
|
| 367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
| 369 |
+
prompt=(
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
f"{system_prompt}\n"
|
|
|
|
| 372 |
|
| 373 |
+
f"TONE:{chosen_tone}\n"
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
f"TARGET LENGTH:{target_length} (~{min_words}-{max_words} words)\n"
|
|
|
|
|
|
|
| 376 |
|
| 377 |
+
f"INPUT TEXT:{input_text}\n\n"
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
+
f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
|
| 380 |
|
| 381 |
+
"Please provide the output in the following JSON format without any additional text:\n\n"
|
|
|
|
|
|
|
| 382 |
|
| 383 |
+
"{\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
+
' "dialogue":[\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
+
' {\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
+
' "speaker":"Jane",\n'
|
|
|
|
|
|
|
| 390 |
|
| 391 |
+
' "text":"..."\n'
|
|
|
|
|
|
|
| 392 |
|
| 393 |
+
' },\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
+
' {\n'
|
|
|
|
|
|
|
| 396 |
|
| 397 |
+
' "speaker":"John",\n'
|
|
|
|
| 398 |
|
| 399 |
+
' "text":"..."\n'
|
|
|
|
| 400 |
|
| 401 |
+
' }\n'
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
+
" ]\n"
|
|
|
|
| 404 |
|
| 405 |
+
"}"
|
| 406 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
+
print("[LOG] Sending prompt to Groq:")
|
| 410 |
+
print(prompt)
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
+
try:
|
| 413 |
|
| 414 |
+
response=groq_client.chat.completions.create(
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
+
messages=[{"role":"system","content":prompt}],
|
|
|
|
|
|
|
| 417 |
|
| 418 |
+
model="llama-3.3-70b-versatile",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
|
| 420 |
+
max_tokens=2048,
|
| 421 |
+
|
| 422 |
+
temperature=0.7
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
except Exception as e:
|
| 427 |
+
|
| 428 |
+
print("[ERROR] Groq API error:", e)
|
| 429 |
+
|
| 430 |
+
raise ValueError(f"Error communicating with Groq API:{str(e)}")
|
| 431 |
+
|
| 432 |
+
raw_content=response.choices[0].message.content.strip()
|
| 433 |
+
|
| 434 |
+
start_index=raw_content.find('{')
|
| 435 |
+
|
| 436 |
+
end_index=raw_content.rfind('}')
|
| 437 |
+
|
| 438 |
+
if start_index==-1 or end_index==-1:
|
| 439 |
+
|
| 440 |
+
raise ValueError("Failed to parse dialogue:No JSON found.")
|
| 441 |
+
|
| 442 |
+
json_str=raw_content[start_index:end_index+1].strip()
|
| 443 |
+
|
| 444 |
+
try:
|
| 445 |
+
|
| 446 |
+
data=json.loads(json_str)
|
| 447 |
+
|
| 448 |
+
dialogue_list=data.get("dialogue",[])
|
| 449 |
+
|
| 450 |
+
for d in dialogue_list:
|
| 451 |
+
|
| 452 |
+
raw_speaker=d.get("speaker","Jane")
|
| 453 |
+
|
| 454 |
+
if raw_speaker.lower()==host_name.lower():
|
| 455 |
+
|
| 456 |
+
d["speaker"]="Jane"
|
| 457 |
+
|
| 458 |
+
d["display_speaker"]=host_name
|
| 459 |
+
|
| 460 |
+
elif raw_speaker.lower()==guest_name.lower():
|
| 461 |
+
|
| 462 |
+
d["speaker"]="John"
|
| 463 |
+
|
| 464 |
+
d["display_speaker"]=guest_name
|
| 465 |
+
|
| 466 |
+
else:
|
| 467 |
+
|
| 468 |
+
d["speaker"]="Jane"
|
| 469 |
+
|
| 470 |
+
d["display_speaker"]=raw_speaker
|
| 471 |
+
|
| 472 |
+
new_dialogue_items=[]
|
| 473 |
+
|
| 474 |
+
for d in dialogue_list:
|
| 475 |
+
|
| 476 |
+
if “display_speaker” not in d:
|
| 477 |
+
|
| 478 |
+
d[“display_speaker”]=d[“speaker”]
|
| 479 |
+
|
| 480 |
+
new_dialogue_items.append(DialogueItem(**d))
|
| 481 |
+
|
| 482 |
+
return Dialogue(dialogue=new_dialogue_items)
|
| 483 |
+
|
| 484 |
+
except json.JSONDecodeError as e:
|
| 485 |
+
|
| 486 |
+
print("[ERROR] JSON decoding(format) failed:", e)
|
| 487 |
+
|
| 488 |
+
raise ValueError(f"Failed to parse dialogue:{str(e)}")
|
| 489 |
+
|
| 490 |
+
except Exception as e:
|
| 491 |
+
|
| 492 |
+
print("[ERROR] JSON decoding failed:", e)
|
| 493 |
+
|
| 494 |
+
raise ValueError(f"Failed to parse dialogue:{str(e)}")
|
| 495 |
+
|
| 496 |
+
def transcribe_youtube_video(video_url:str)->str:
|
| 497 |
+
|
| 498 |
+
print("[LOG] Transcribing YouTube video via RapidAPI:", video_url)
|
| 499 |
+
|
| 500 |
+
video_id_match=re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", video_url)
|
| 501 |
+
|
| 502 |
+
if not video_id_match:
|
| 503 |
+
|
| 504 |
+
raise ValueError(f"Invalid YouTube URL:{video_url},cannot extract video ID.")
|
| 505 |
+
|
| 506 |
+
video_id=video_id_match.group(1)
|
| 507 |
+
|
| 508 |
+
print("[LOG] Extracted video ID:", video_id)
|
| 509 |
+
|
| 510 |
+
base_url="https://youtube-transcriptor.p.rapidapi.com/transcript"
|
| 511 |
+
|
| 512 |
+
params={
|
| 513 |
+
|
| 514 |
+
"video_id":video_id,
|
| 515 |
+
|
| 516 |
+
"lang":"en"
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
headers={
|
| 520 |
+
|
| 521 |
+
"x-rapidapi-host":"youtube-transcriptor.p.rapidapi.com",
|
| 522 |
+
|
| 523 |
+
"x-rapidapi-key":os.environ.get("RAPIDAPI_KEY")
|
| 524 |
+
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
try:
|
| 528 |
+
|
| 529 |
+
response=requests.get(base_url,headers=headers,params=params,timeouot=30)
|
| 530 |
+
|
| 531 |
+
print("[LOG] RapidAPI Response Status Code:",response.status_code)
|
| 532 |
+
|
| 533 |
+
print("[LOG] RapidAPI Response Body:",response.text)
|
| 534 |
+
|
| 535 |
+
if response.status_code!=200:
|
| 536 |
+
|
| 537 |
+
raise ValueError(f"RapidAPI transcription error:{response.status_code},{response.text}")
|
| 538 |
+
|
| 539 |
+
data=response.json()
|
| 540 |
+
|
| 541 |
+
if not isinstance(data,list) or not data:
|
| 542 |
+
|
| 543 |
+
raise ValueError(f"Unexpected transcript format or empty transcript:{data}")
|
| 544 |
+
|
| 545 |
+
transcript_as_text=data[0].get('transcriptionAsText','').strip()
|
| 546 |
+
|
| 547 |
+
if not transcript_as_text:
|
| 548 |
+
|
| 549 |
+
raise ValueError("transcriptionAsText field is missing or empty.")
|
| 550 |
+
|
| 551 |
+
print("[LOG] Transcript retrieval successful.")
|
| 552 |
+
|
| 553 |
+
print(f"[DEBUG] Transcript Length:{len(transcript_as_text)} characters.")
|
| 554 |
+
|
| 555 |
+
snippet=transcript_as_text[:200]+"..."if len(transcript_as_text)>200 else transcript_as_text
|
| 556 |
+
|
| 557 |
+
print(f"[DEBUG] Transcript Snippet:{snippet}")
|
| 558 |
+
|
| 559 |
+
return transcript_as_text
|
| 560 |
+
|
| 561 |
+
except Exception as e:
|
| 562 |
+
|
| 563 |
+
print("[ERROR] RapidAPI transcription error:",e)
|
| 564 |
+
|
| 565 |
+
raise ValueError(f"Error transcribing YouTube video via RapidAPI:{str(e)}")
|
| 566 |
+
|
| 567 |
+
def generate_audio_mp3(text:str,speaker:str)->str:
|
| 568 |
+
"""
|
| 569 |
+
Calls Deepgram TTS with the text returning a path to a temp MP3 file.
|
| 570 |
+
We also do some pre-processing for punctuation abbreviations,
|
| 571 |
+
numeric expansions plus emotive expressions (ha sigh etc.).
|
| 572 |
+
"""
|
| 573 |
+
try:
|
| 574 |
+
|
| 575 |
+
print(f"[LOG] Generating audio for speaker:{speaker}")
|
| 576 |
+
|
| 577 |
+
processed_text=_preprocess_text_for_tts(text,speaker)
|
| 578 |
+
|
| 579 |
+
deepgram_api_url="https://api.deepgram.com/v1/speak"
|
| 580 |
+
|
| 581 |
+
params={
|
| 582 |
+
|
| 583 |
+
"model":"aura-asteria-en", # female by default
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
if speaker=="John":
|
| 587 |
+
params["model"]="aura-zeus-en"
|
| 588 |
+
|
| 589 |
+
headers={
|
| 590 |
+
|
| 591 |
+
"Accept":"audio/mpeg",
|
| 592 |
+
|
| 593 |
+
"Content-Type":"application/json",
|
| 594 |
+
|
| 595 |
+
"Authorization":f"Token{os.environ.get('DEEPGRAM_API_KEY')}"
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
body={
|
| 599 |
+
"text":processed_text
|
| 600 |
+
}
|
| 601 |
+
|
| 602 |
+
response=requests.post(deepgram_api_url,param=params ,headers=headers,json=body ,stream=True)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
if response.status_code!=200:
|
| 606 |
+
|
| 607 |
+
raise ValueError(f"Deepgram TTS error:{response.status_code},{response.text}")
|
| 608 |
+
|
| 609 |
+
content_type=response.headers
|