Spaces:
Runtime error
Runtime error
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -494,21 +494,20 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
|
| 494 |
# 1) "SaaS" => "sass"
|
| 495 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
| 496 |
|
| 497 |
-
# 2) Insert periods
|
| 498 |
def insert_periods_for_abbrev(m):
|
| 499 |
abbr = m.group(0)
|
| 500 |
return ".".join(list(abbr)) + "."
|
| 501 |
-
text = re.sub(r"\b([A-
|
| 502 |
text = re.sub(r"\.\.", ".", text)
|
| 503 |
def remove_periods_for_tts(m):
|
| 504 |
return m.group(0).replace(".", " ").strip()
|
| 505 |
-
text = re.sub(r"[A-
|
| 506 |
|
| 507 |
# 3) Replace hyphens with spaces
|
| 508 |
text = re.sub(r"-", " ", text)
|
| 509 |
|
| 510 |
# Removed numeric conversions to let TTS handle numbers naturally.
|
| 511 |
-
# No regex or num2words conversion for numbers here.
|
| 512 |
|
| 513 |
# 6) Emotive placeholders
|
| 514 |
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
|
@@ -540,7 +539,6 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
|
|
| 540 |
|
| 541 |
return text.strip()
|
| 542 |
|
| 543 |
-
|
| 544 |
def _spell_digits(d: str) -> str:
|
| 545 |
"""
|
| 546 |
Convert individual digits '3' -> 'three'.
|
|
@@ -610,4 +608,3 @@ def call_groq_api_for_qa(system_prompt: str) -> str:
|
|
| 610 |
|
| 611 |
raw_content = response.choices[0].message.content.strip()
|
| 612 |
return raw_content
|
| 613 |
-
|
|
|
|
| 494 |
# 1) "SaaS" => "sass"
|
| 495 |
text = re.sub(r"\b(?i)SaaS\b", "sass", text)
|
| 496 |
|
| 497 |
+
# 2) Insert periods in uppercase abbreviations (letters only), then remove them
|
| 498 |
def insert_periods_for_abbrev(m):
|
| 499 |
abbr = m.group(0)
|
| 500 |
return ".".join(list(abbr)) + "."
|
| 501 |
+
text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
|
| 502 |
text = re.sub(r"\.\.", ".", text)
|
| 503 |
def remove_periods_for_tts(m):
|
| 504 |
return m.group(0).replace(".", " ").strip()
|
| 505 |
+
text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
|
| 506 |
|
| 507 |
# 3) Replace hyphens with spaces
|
| 508 |
text = re.sub(r"-", " ", text)
|
| 509 |
|
| 510 |
# Removed numeric conversions to let TTS handle numbers naturally.
|
|
|
|
| 511 |
|
| 512 |
# 6) Emotive placeholders
|
| 513 |
text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
|
|
|
|
| 539 |
|
| 540 |
return text.strip()
|
| 541 |
|
|
|
|
| 542 |
def _spell_digits(d: str) -> str:
|
| 543 |
"""
|
| 544 |
Convert individual digits '3' -> 'three'.
|
|
|
|
| 608 |
|
| 609 |
raw_content = response.choices[0].message.content.strip()
|
| 610 |
return raw_content
|
|
|