jellecali8 commited on
Commit
046cd50
Β·
verified Β·
1 Parent(s): 3f30f02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -37
app.py CHANGED
@@ -1,41 +1,31 @@
1
- import os
2
- import tempfile
3
  import torch
4
  import soundfile as sf
 
 
 
 
 
 
5
 
6
  def tts(text):
7
- try:
8
- if not text.strip():
9
- return None # Qoraal madhan ha soo gelin
10
- device = next(model.parameters()).device
11
- inputs = tokenizer(text, return_tensors="pt").to(device)
12
- custom_embedding_ = custom_embedding.to(device)
13
- with torch.no_grad():
14
- outputs = model(**inputs, speaker_embeddings=custom_embedding_)
15
- waveform = outputs.waveform.squeeze().cpu().numpy()
16
-
17
- # Hubi waveform-ka: min, max, shape
18
- print(f"Waveform shape: {waveform.shape}, min: {waveform.min()}, max: {waveform.max()}")
19
-
20
- # Normalize waveform
21
- max_val = max(abs(waveform.max()), abs(waveform.min()))
22
- if max_val > 0:
23
- waveform = waveform / max_val
24
- else:
25
- print("Warning: Waveform is all zeros")
26
- return None
27
-
28
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
29
- sf.write(tmp.name, waveform, 16000)
30
-
31
- # Hubi faylka la abuuray: path iyo size
32
- print(f"Generated WAV file path: {tmp.name}")
33
- print(f"WAV file size: {os.path.getsize(tmp.name)} bytes")
34
-
35
- if os.path.getsize(tmp.name) == 0:
36
- print("Warning: Generated WAV file is empty")
37
- return None
38
-
39
- return tmp.name
40
- except Exception as e:
41
- return f"Error: {str(e)}"
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
3
  import torch
4
  import soundfile as sf
5
+ import tempfile
6
+
7
+ # Load model and processor
8
+ model_id = "jellecali8/somali_tts_model"
9
+ processor = AutoProcessor.from_pretrained(model_id)
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
11
 
12
  def tts(text):
13
+ inputs = processor(text, return_tensors="pt")
14
+ with torch.no_grad():
15
+ outputs = model.generate(**inputs)
16
+ audio = outputs[0].cpu().numpy()
17
+
18
+ # Save to temporary file
19
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
20
+ sf.write(f.name, audio, samplerate=16000)
21
+ return f.name
22
+
23
+ iface = gr.Interface(
24
+ fn=tts,
25
+ inputs=gr.Textbox(lines=2, label="Enter Somali Text"),
26
+ outputs=gr.Audio(label="Generated Speech"),
27
+ title="Somali TTS Demo",
28
+ description="Ku qor qoraalka Somali, kadib dhageyso codka."
29
+ )
30
+
31
+ iface.launch()