jellecali8 commited on
Commit
3f30f02
Β·
verified Β·
1 Parent(s): 0adbc6d
Files changed (1) hide show
  1. app.py +14 -40
app.py CHANGED
@@ -1,67 +1,41 @@
1
- import gradio as gr
 
2
  import torch
3
  import soundfile as sf
4
- import tempfile
5
- import os
6
- from transformers import AutoTokenizer, VitsModel
7
- import numpy as np
8
-
9
- # Bedel username/repo-gaaga saxda ah
10
- repo_id = "jellecali8/Somali_tts_model"
11
-
12
- # Load tokenizer iyo model
13
- tokenizer = AutoTokenizer.from_pretrained(repo_id)
14
- model = VitsModel.from_pretrained(repo_id)
15
- model.eval()
16
-
17
- # Load custom speaker embedding (.npy) file path
18
- try:
19
- custom_embedding_np = np.load("somali_speaker_embedding.npy") # Ku dar faylka Space folder-ka
20
- custom_embedding = torch.tensor(custom_embedding_np, dtype=torch.float32).unsqueeze(0)
21
- except Exception:
22
- # Haddii embedding file ma jiro, isticmaal random tensor (kaliya tijaabo)
23
- custom_embedding = torch.randn(1, 256)
24
 
25
  def tts(text):
26
  try:
27
  if not text.strip():
28
  return None # Qoraal madhan ha soo gelin
29
- # U gudbi inputs iyo embedding device-ka model-ka (CPU/GPU)
30
  device = next(model.parameters()).device
31
  inputs = tokenizer(text, return_tensors="pt").to(device)
32
  custom_embedding_ = custom_embedding.to(device)
33
  with torch.no_grad():
34
  outputs = model(**inputs, speaker_embeddings=custom_embedding_)
35
  waveform = outputs.waveform.squeeze().cpu().numpy()
36
-
37
- # Normalize waveform si uu u dhex maro -1.0 ilaa 1.0
 
 
 
38
  max_val = max(abs(waveform.max()), abs(waveform.min()))
39
  if max_val > 0:
40
  waveform = waveform / max_val
41
  else:
42
- # Haddii waveform dhan yahay eber, soo celi error ama None
43
  print("Warning: Waveform is all zeros")
44
  return None
45
-
46
- # Kaydi waveform file .wav ah
47
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
48
  sf.write(tmp.name, waveform, 16000)
49
-
50
- # Hubi file size si loo xaqiijiyo inuu sax yahay
 
 
 
51
  if os.path.getsize(tmp.name) == 0:
52
  print("Warning: Generated WAV file is empty")
53
  return None
54
-
55
  return tmp.name
56
  except Exception as e:
57
  return f"Error: {str(e)}"
58
-
59
- demo = gr.Interface(
60
- fn=tts,
61
- inputs=gr.Textbox(lines=2, placeholder="Gali qoraalkaaga halkan..."),
62
- outputs=gr.Audio(type="filepath"),
63
- title="Somali TTS with VITS",
64
- description="Qor qoraal Soomaali ah, riix 'Submit', oo dhageyso codka Somali native."
65
- )
66
-
67
- demo.launch()
 
1
+ import os
2
+ import tempfile
3
  import torch
4
  import soundfile as sf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def tts(text):
7
  try:
8
  if not text.strip():
9
  return None # Qoraal madhan ha soo gelin
 
10
  device = next(model.parameters()).device
11
  inputs = tokenizer(text, return_tensors="pt").to(device)
12
  custom_embedding_ = custom_embedding.to(device)
13
  with torch.no_grad():
14
  outputs = model(**inputs, speaker_embeddings=custom_embedding_)
15
  waveform = outputs.waveform.squeeze().cpu().numpy()
16
+
17
+ # Hubi waveform-ka: min, max, shape
18
+ print(f"Waveform shape: {waveform.shape}, min: {waveform.min()}, max: {waveform.max()}")
19
+
20
+ # Normalize waveform
21
  max_val = max(abs(waveform.max()), abs(waveform.min()))
22
  if max_val > 0:
23
  waveform = waveform / max_val
24
  else:
 
25
  print("Warning: Waveform is all zeros")
26
  return None
27
+
 
28
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
29
  sf.write(tmp.name, waveform, 16000)
30
+
31
+ # Hubi faylka la abuuray: path iyo size
32
+ print(f"Generated WAV file path: {tmp.name}")
33
+ print(f"WAV file size: {os.path.getsize(tmp.name)} bytes")
34
+
35
  if os.path.getsize(tmp.name) == 0:
36
  print("Warning: Generated WAV file is empty")
37
  return None
38
+
39
  return tmp.name
40
  except Exception as e:
41
  return f"Error: {str(e)}"