siddhartharyaai commited on
Commit
5006b54
·
verified ·
1 Parent(s): 4e81954

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +3 -6
utils.py CHANGED
@@ -494,21 +494,20 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
494
  # 1) "SaaS" => "sass"
495
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
496
 
497
- # 2) Insert periods for uppercase abbreviations (>=2 chars), then remove them
498
  def insert_periods_for_abbrev(m):
499
  abbr = m.group(0)
500
  return ".".join(list(abbr)) + "."
501
- text = re.sub(r"\b([A-Z0-9]{2,})\b", insert_periods_for_abbrev, text)
502
  text = re.sub(r"\.\.", ".", text)
503
  def remove_periods_for_tts(m):
504
  return m.group(0).replace(".", " ").strip()
505
- text = re.sub(r"[A-Z0-9]\.[A-Z0-9](?:\.[A-Z0-9])*\.", remove_periods_for_tts, text)
506
 
507
  # 3) Replace hyphens with spaces
508
  text = re.sub(r"-", " ", text)
509
 
510
  # Removed numeric conversions to let TTS handle numbers naturally.
511
- # No regex or num2words conversion for numbers here.
512
 
513
  # 6) Emotive placeholders
514
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
@@ -540,7 +539,6 @@ def _preprocess_text_for_tts(text: str, speaker: str) -> str:
540
 
541
  return text.strip()
542
 
543
-
544
  def _spell_digits(d: str) -> str:
545
  """
546
  Convert individual digits '3' -> 'three'.
@@ -610,4 +608,3 @@ def call_groq_api_for_qa(system_prompt: str) -> str:
610
 
611
  raw_content = response.choices[0].message.content.strip()
612
  return raw_content
613
-
 
494
  # 1) "SaaS" => "sass"
495
  text = re.sub(r"\b(?i)SaaS\b", "sass", text)
496
 
497
+ # 2) Insert periods in uppercase abbreviations (letters only), then remove them
498
  def insert_periods_for_abbrev(m):
499
  abbr = m.group(0)
500
  return ".".join(list(abbr)) + "."
501
+ text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
502
  text = re.sub(r"\.\.", ".", text)
503
  def remove_periods_for_tts(m):
504
  return m.group(0).replace(".", " ").strip()
505
+ text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
506
 
507
  # 3) Replace hyphens with spaces
508
  text = re.sub(r"-", " ", text)
509
 
510
  # Removed numeric conversions to let TTS handle numbers naturally.
 
511
 
512
  # 6) Emotive placeholders
513
  text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
 
539
 
540
  return text.strip()
541
 
 
542
  def _spell_digits(d: str) -> str:
543
  """
544
  Convert individual digits '3' -> 'three'.
 
608
 
609
  raw_content = response.choices[0].message.content.strip()
610
  return raw_content