Veena commited on
Commit
06301dc
·
1 Parent(s): a596bee

Update Maya1 Gradio app with preset characters

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -22,21 +22,21 @@ from maya1.constants import AUDIO_SAMPLE_RATE
22
 
23
  # Preset characters (2 realistic + 2 creative)
24
  PRESET_CHARACTERS = {
25
- "Realistic: Sarcastic Male (American)": {
26
- "description": "Realistic male voice in the 30s age with a american accent. Low pitch, nasally timbre, conversational pacing, sarcastic tone delivery at low intensity, commercial domain, product_demo_voice role, formal delivery",
27
- "example_text": "<sarcastic> He really stood up there and said we need to <chuckle> save the world. <sigh> What a joke."
28
  },
29
- "Realistic: Excited Female (Asian-American)": {
30
- "description": "Realistic female voice in the 20s age with a asian_american accent. Normal pitch, smooth timbre, conversational pacing, neutral tone delivery at high intensity, viral_content domain, meme_voice role, formal delivery",
31
- "example_text": "<excited> I am issuing a formal commendation for this particular item! It has exceeded all established metrics for excellence. <gasp> This is something I would actually spend my own money on. <laugh> Seriously!"
32
  },
33
- "Creative: Alpha Leader (Indian)": {
34
- "description": "Creative, alpha character. Male voice in their 30s with a indian accent. Normal pitch, nasally timbre, very_fast pacing, energetic tone at medium intensity.",
35
- "example_text": "<angry> I don't want to hear excuses, I only want to see solutions! <sigh> Get your teams together, brainstorm for thirty minutes, and come back to me with a plan. <excited> Now move!"
36
  },
37
- "Creative: Vampire (Middle Eastern)": {
38
- "description": "Creative, vampire character. Male voice in their 40s with a middle_eastern accent. Low pitch, nasally timbre, very_slow pacing, excited tone at medium intensity.",
39
- "example_text": "<whisper> Soon you will join me in this magnificent eternal darkness. <laugh> And we shall feast upon the world together, <excited> bound by this exquisite night forever. <mischievous>"
40
  }
41
  }
42
 
@@ -54,6 +54,20 @@ def load_models():
54
  if models_loaded:
55
  return
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  print("Loading Maya1 model with vLLM...")
58
  model = Maya1Model(
59
  model_path="maya-research/maya1",
@@ -67,7 +81,7 @@ def load_models():
67
 
68
  print("Loading SNAC decoder...")
69
  snac_decoder = SNACDecoder(
70
- device="cuda",
71
  enable_batching=False,
72
  )
73
 
@@ -224,16 +238,9 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
224
  gr.Markdown("""
225
  ### Supported Emotions
226
 
227
- `<angry>` `<appalled>` `<chuckle>` `<cry>` `<curious>` `<disappointed>`
228
- `<excited>` `<exhale>` `<gasp>` `<giggle>` `<gulp>` `<laugh>`
229
- `<laugh_harder>` `<mischievous>` `<sarcastic>` `<scream>` `<sigh>`
230
- `<sing>` `<snort>` `<whisper>`
231
-
232
- ### Tips
233
- - Use emotion tags naturally in your text
234
- - Longer text needs more max_tokens
235
- - Lower temperature for consistent results
236
- - Presets are great starting points!
237
  """)
238
 
239
  # Event handlers
 
22
 
23
  # Preset characters (2 realistic + 2 creative)
24
  PRESET_CHARACTERS = {
25
+ "Male American": {
26
+ "description": "Male voice in their 30s with american accent",
27
+ "example_text": "Hello world <laugh_harder> this is amazing <giggle> I love it"
28
  },
29
+ "Female British": {
30
+ "description": "Female voice in their 20s with british accent",
31
+ "example_text": "Welcome everyone <excited> let me tell you something <sigh> incredible"
32
  },
33
+ "Robot": {
34
+ "description": "Creative, ai_machine_voice character. Male voice with robotic timbre",
35
+ "example_text": "System initialized <whisper> processing data <gasp> computation complete"
36
  },
37
+ "Singer": {
38
+ "description": "Creative character. Female voice with smooth timbre",
39
+ "example_text": "Listen to this <sing> la la la <laugh> beautiful melody <giggle>"
40
  }
41
  }
42
 
 
54
  if models_loaded:
55
  return
56
 
57
+ import torch
58
+ import os
59
+
60
+ # Ensure CUDA is available for HF Spaces
61
+ if not torch.cuda.is_available():
62
+ print("Warning: CUDA not available, using CPU")
63
+ device = "cpu"
64
+ else:
65
+ device = "cuda"
66
+ print(f"CUDA available: {torch.cuda.get_device_name(0)}")
67
+
68
+ # Set environment variable for vLLM
69
+ os.environ.setdefault("VLLM_USE_V1", "0")
70
+
71
  print("Loading Maya1 model with vLLM...")
72
  model = Maya1Model(
73
  model_path="maya-research/maya1",
 
81
 
82
  print("Loading SNAC decoder...")
83
  snac_decoder = SNACDecoder(
84
+ device=device,
85
  enable_batching=False,
86
  )
87
 
 
238
  gr.Markdown("""
239
  ### Supported Emotions
240
 
241
+ `<angry>` `<chuckle>` `<cry>` `<disappointed>` `<excited>` `<gasp>`
242
+ `<giggle>` `<laugh>` `<laugh_harder>` `<sarcastic>` `<sigh>`
243
+ `<sing>` `<whisper>`
 
 
 
 
 
 
 
244
  """)
245
 
246
  # Event handlers