maya1

Running

App Files Files Community

Veena commited on 8 days ago

Commit

06301dc

1 Parent(s): a596bee

Update Maya1 Gradio app with preset characters

Browse files

Files changed (1) hide show

app.py +30 -23

app.py CHANGED Viewed

@@ -22,21 +22,21 @@ from maya1.constants import AUDIO_SAMPLE_RATE
 # Preset characters (2 realistic + 2 creative)
 PRESET_CHARACTERS = {
-    "Realistic: Sarcastic Male (American)": {
-        "description": "Realistic male voice in the 30s age with a american accent. Low pitch, nasally timbre, conversational pacing, sarcastic tone delivery at low intensity, commercial domain, product_demo_voice role, formal delivery",
-        "example_text": "<sarcastic> He really stood up there and said we need to <chuckle> save the world. <sigh> What a joke."
     },
-    "Realistic: Excited Female (Asian-American)": {
-        "description": "Realistic female voice in the 20s age with a asian_american accent. Normal pitch, smooth timbre, conversational pacing, neutral tone delivery at high intensity, viral_content domain, meme_voice role, formal delivery",
-        "example_text": "<excited> I am issuing a formal commendation for this particular item! It has exceeded all established metrics for excellence. <gasp> This is something I would actually spend my own money on. <laugh> Seriously!"
     },
-    "Creative: Alpha Leader (Indian)": {
-        "description": "Creative, alpha character. Male voice in their 30s with a indian accent. Normal pitch, nasally timbre, very_fast pacing, energetic tone at medium intensity.",
-        "example_text": "<angry> I don't want to hear excuses, I only want to see solutions! <sigh> Get your teams together, brainstorm for thirty minutes, and come back to me with a plan. <excited> Now move!"
     },
-    "Creative: Vampire (Middle Eastern)": {
-        "description": "Creative, vampire character. Male voice in their 40s with a middle_eastern accent. Low pitch, nasally timbre, very_slow pacing, excited tone at medium intensity.",
-        "example_text": "<whisper> Soon you will join me in this magnificent eternal darkness. <laugh> And we shall feast upon the world together, <excited> bound by this exquisite night forever. <mischievous>"
     }
 }
@@ -54,6 +54,20 @@ def load_models():
     if models_loaded:
         return
     print("Loading Maya1 model with vLLM...")
     model = Maya1Model(
         model_path="maya-research/maya1",
@@ -67,7 +81,7 @@ def load_models():
     print("Loading SNAC decoder...")
     snac_decoder = SNACDecoder(
-        device="cuda",
         enable_batching=False,
     )
@@ -224,16 +238,9 @@ with gr.Blocks(title="Maya1 - Open Source Emotional TTS", theme=gr.themes.Soft()
             gr.Markdown("""
             ### Supported Emotions
-            `<angry>` `<appalled>` `<chuckle>` `<cry>` `<curious>` `<disappointed>`
-            `<excited>` `<exhale>` `<gasp>` `<giggle>` `<gulp>` `<laugh>`
-            `<laugh_harder>` `<mischievous>` `<sarcastic>` `<scream>` `<sigh>`
-            `<sing>` `<snort>` `<whisper>`
-            ### Tips
-            - Use emotion tags naturally in your text
-            - Longer text needs more max_tokens
-            - Lower temperature for consistent results
-            - Presets are great starting points!
             """)
     # Event handlers

 # Preset characters (2 realistic + 2 creative)
 PRESET_CHARACTERS = {
+    "Male American": {
+        "description": "Male voice in their 30s with american accent",
+        "example_text": "Hello world <laugh_harder> this is amazing <giggle> I love it"
     },
+    "Female British": {
+        "description": "Female voice in their 20s with british accent",
+        "example_text": "Welcome everyone <excited> let me tell you something <sigh> incredible"
     },
+    "Robot": {
+        "description": "Creative, ai_machine_voice character. Male voice with robotic timbre",
+        "example_text": "System initialized <whisper> processing data <gasp> computation complete"
     },
+    "Singer": {
+        "description": "Creative character. Female voice with smooth timbre",
+        "example_text": "Listen to this <sing> la la la <laugh> beautiful melody <giggle>"
     }
 }
     if models_loaded:
         return
+    import torch
+    import os
+    # Ensure CUDA is available for HF Spaces
+    if not torch.cuda.is_available():
+        print("Warning: CUDA not available, using CPU")
+        device = "cpu"
+    else:
+        device = "cuda"
+        print(f"CUDA available: {torch.cuda.get_device_name(0)}")
+    # Set environment variable for vLLM
+    os.environ.setdefault("VLLM_USE_V1", "0")
     print("Loading Maya1 model with vLLM...")
     model = Maya1Model(
         model_path="maya-research/maya1",
     print("Loading SNAC decoder...")
     snac_decoder = SNACDecoder(
+        device=device,
         enable_batching=False,
     )
             gr.Markdown("""
             ### Supported Emotions
+            `<angry>` `<chuckle>` `<cry>` `<disappointed>` `<excited>` `<gasp>`
+            `<giggle>` `<laugh>` `<laugh_harder>` `<sarcastic>` `<sigh>`
+            `<sing>` `<whisper>`
             """)
     # Event handlers