Veena
commited on
Commit
·
06301dc
1
Parent(s):
a596bee
Update Maya1 Gradio app with preset characters
Browse files
app.py
CHANGED
|
@@ -22,21 +22,21 @@ from maya1.constants import AUDIO_SAMPLE_RATE
|
|
| 22 |
|
| 23 |
# Preset characters (2 realistic + 2 creative)
|
| 24 |
PRESET_CHARACTERS = {
|
| 25 |
-
"
|
| 26 |
-
"description": "
|
| 27 |
-
"example_text": "
|
| 28 |
},
|
| 29 |
-
"
|
| 30 |
-
"description": "
|
| 31 |
-
"example_text": "<excited>
|
| 32 |
},
|
| 33 |
-
"
|
| 34 |
-
"description": "Creative,
|
| 35 |
-
"example_text": "
|
| 36 |
},
|
| 37 |
-
"
|
| 38 |
-
"description": "Creative
|
| 39 |
-
"example_text": "
|
| 40 |
}
|
| 41 |
}
|
| 42 |
|
|
@@ -54,6 +54,20 @@ def load_models():
|
|
| 54 |
if models_loaded:
|
| 55 |
return
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
print("Loading Maya1 model with vLLM...")
|
| 58 |
model = Maya1Model(
|
| 59 |
model_path="maya-research/maya1",
|
|
@@ -67,7 +81,7 @@ def load_models():
|
|
| 67 |
|
| 68 |
print("Loading SNAC decoder...")
|
| 69 |
snac_decoder = SNACDecoder(
|
| 70 |
-
device=
|
| 71 |
enable_batching=False,
|
| 72 |
)
|
| 73 |
|
|
@@ -224,16 +238,9 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
|
|
| 224 |
gr.Markdown("""
|
| 225 |
### Supported Emotions
|
| 226 |
|
| 227 |
-
`<angry>` `<
|
| 228 |
-
`<
|
| 229 |
-
`<
|
| 230 |
-
`<sing>` `<snort>` `<whisper>`
|
| 231 |
-
|
| 232 |
-
### Tips
|
| 233 |
-
- Use emotion tags naturally in your text
|
| 234 |
-
- Longer text needs more max_tokens
|
| 235 |
-
- Lower temperature for consistent results
|
| 236 |
-
- Presets are great starting points!
|
| 237 |
""")
|
| 238 |
|
| 239 |
# Event handlers
|
|
|
|
| 22 |
|
| 23 |
# Preset characters (2 realistic + 2 creative)
|
| 24 |
PRESET_CHARACTERS = {
|
| 25 |
+
"Male American": {
|
| 26 |
+
"description": "Male voice in their 30s with american accent",
|
| 27 |
+
"example_text": "Hello world <laugh_harder> this is amazing <giggle> I love it"
|
| 28 |
},
|
| 29 |
+
"Female British": {
|
| 30 |
+
"description": "Female voice in their 20s with british accent",
|
| 31 |
+
"example_text": "Welcome everyone <excited> let me tell you something <sigh> incredible"
|
| 32 |
},
|
| 33 |
+
"Robot": {
|
| 34 |
+
"description": "Creative, ai_machine_voice character. Male voice with robotic timbre",
|
| 35 |
+
"example_text": "System initialized <whisper> processing data <gasp> computation complete"
|
| 36 |
},
|
| 37 |
+
"Singer": {
|
| 38 |
+
"description": "Creative character. Female voice with smooth timbre",
|
| 39 |
+
"example_text": "Listen to this <sing> la la la <laugh> beautiful melody <giggle>"
|
| 40 |
}
|
| 41 |
}
|
| 42 |
|
|
|
|
| 54 |
if models_loaded:
|
| 55 |
return
|
| 56 |
|
| 57 |
+
import torch
|
| 58 |
+
import os
|
| 59 |
+
|
| 60 |
+
# Ensure CUDA is available for HF Spaces
|
| 61 |
+
if not torch.cuda.is_available():
|
| 62 |
+
print("Warning: CUDA not available, using CPU")
|
| 63 |
+
device = "cpu"
|
| 64 |
+
else:
|
| 65 |
+
device = "cuda"
|
| 66 |
+
print(f"CUDA available: {torch.cuda.get_device_name(0)}")
|
| 67 |
+
|
| 68 |
+
# Set environment variable for vLLM
|
| 69 |
+
os.environ.setdefault("VLLM_USE_V1", "0")
|
| 70 |
+
|
| 71 |
print("Loading Maya1 model with vLLM...")
|
| 72 |
model = Maya1Model(
|
| 73 |
model_path="maya-research/maya1",
|
|
|
|
| 81 |
|
| 82 |
print("Loading SNAC decoder...")
|
| 83 |
snac_decoder = SNACDecoder(
|
| 84 |
+
device=device,
|
| 85 |
enable_batching=False,
|
| 86 |
)
|
| 87 |
|
|
|
|
| 238 |
gr.Markdown("""
|
| 239 |
### Supported Emotions
|
| 240 |
|
| 241 |
+
`<angry>` `<chuckle>` `<cry>` `<disappointed>` `<excited>` `<gasp>`
|
| 242 |
+
`<giggle>` `<laugh>` `<laugh_harder>` `<sarcastic>` `<sigh>`
|
| 243 |
+
`<sing>` `<whisper>`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
""")
|
| 245 |
|
| 246 |
# Event handlers
|