maya1

Running

App Files Files Community

Veena commited on 9 days ago

Commit

a596bee

1 Parent(s): 002a88c

Update Maya1 Gradio app with preset characters

Browse files

Files changed (1) hide show

app.py +34 -28

app.py CHANGED Viewed

@@ -45,34 +45,37 @@ model = None
 prompt_builder = None
 snac_decoder = None
 pipeline = None
-@spaces.GPU
-async def load_models():
     """Load Maya1 vLLM model and pipeline (runs once)."""
-    global model, prompt_builder, snac_decoder, pipeline
-    if model is None:
-        print("Loading Maya1 model with vLLM...")
-        model = Maya1Model(
-            model_path="maya-research/maya1",
-            dtype="bfloat16",
-            max_model_len=8192,
-            gpu_memory_utilization=0.85,
-        )
-        print("Initializing prompt builder...")
-        prompt_builder = Maya1PromptBuilder(model.tokenizer, model)
-        print("Loading SNAC decoder...")
-        snac_decoder = SNACDecoder(
-            device="cuda",
-            enable_batching=False,
-        )
-        print("Initializing pipeline...")
-        pipeline = Maya1Pipeline(model, prompt_builder, snac_decoder)
-        print("Models loaded successfully!")
 def preset_selected(preset_name):
     """Update description and text when preset is selected."""
@@ -86,7 +89,7 @@ def generate_speech(preset_name, description, text, temperature, max_tokens):
     """Generate emotional speech from description and text using vLLM."""
     try:
         # Load models if not already loaded
-        asyncio.run(load_models())
         # If using preset, override description
         if preset_name and preset_name in PRESET_CHARACTERS:
@@ -98,8 +101,10 @@ def generate_speech(preset_name, description, text, temperature, max_tokens):
         print(f"Generating with temperature={temperature}, max_tokens={max_tokens}...")
-        # Generate audio using vLLM pipeline
-        audio_bytes = asyncio.run(
             pipeline.generate_speech(
                 description=description,
                 text=text,
@@ -110,6 +115,7 @@ def generate_speech(preset_name, description, text, temperature, max_tokens):
                 seed=None,
             )
         )
         if audio_bytes is None:
             return None, "Error: Audio generation failed. Try different text or increase max_tokens."

 prompt_builder = None
 snac_decoder = None
 pipeline = None
+models_loaded = False
+def load_models():
     """Load Maya1 vLLM model and pipeline (runs once)."""
+    global model, prompt_builder, snac_decoder, pipeline, models_loaded
+    if models_loaded:
+        return
+    print("Loading Maya1 model with vLLM...")
+    model = Maya1Model(
+        model_path="maya-research/maya1",
+        dtype="bfloat16",
+        max_model_len=8192,
+        gpu_memory_utilization=0.85,
+    )
+    print("Initializing prompt builder...")
+    prompt_builder = Maya1PromptBuilder(model.tokenizer, model)
+    print("Loading SNAC decoder...")
+    snac_decoder = SNACDecoder(
+        device="cuda",
+        enable_batching=False,
+    )
+    print("Initializing pipeline...")
+    pipeline = Maya1Pipeline(model, prompt_builder, snac_decoder)
+    models_loaded = True
+    print("Models loaded successfully!")
 def preset_selected(preset_name):
     """Update description and text when preset is selected."""
     """Generate emotional speech from description and text using vLLM."""
     try:
         # Load models if not already loaded
+        load_models()
         # If using preset, override description
         if preset_name and preset_name in PRESET_CHARACTERS:
         print(f"Generating with temperature={temperature}, max_tokens={max_tokens}...")
+        # Generate audio using vLLM pipeline (async wrapper)
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        audio_bytes = loop.run_until_complete(
             pipeline.generate_speech(
                 description=description,
                 text=text,
                 seed=None,
             )
         )
+        loop.close()
         if audio_bytes is None:
             return None, "Error: Audio generation failed. Try different text or increase max_tokens."