ghostai1
/

GHOSTSONAFB

English

python

Model card Files Files and versions

xet

Community

ghostai1 commited on May 11

Commit

5dc6a06

verified ·

1 Parent(s): ee0211d

Update barks.py

Browse files

Files changed (1) hide show

barks.py +43 -35

barks.py CHANGED Viewed

@@ -1,4 +1,4 @@
 import os
 import torch
 import torchaudio
@@ -15,12 +15,13 @@ import warnings
 import random
 from transformers import AutoProcessor, BarkModel
 from accelerate import Accelerator
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore")
 # Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
 # Check critical dependencies
 if np.__version__ != "1.23.5":
@@ -35,14 +36,18 @@ if device != "cuda":
     sys.exit(1)
 print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
-# Initialize accelerator for offloading
-accelerator = Accelerator(mixed_precision="fp16")
 # Pre-run memory cleanup
-torch.cuda.empty_cache()
-gc.collect()
-torch.cuda.ipc_collect()
-torch.cuda.synchronize()
 # 2) LOAD MODELS
 try:
@@ -52,9 +57,9 @@ try:
         print(f"ERROR: Local model path {local_model_path} does not exist.")
         print("Please download the MusicGen medium model weights and place them in the correct directory.")
         sys.exit(1)
-    musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
     musicgen_model.set_generation_params(
-        duration=10,  # Default chunk duration
         two_step_cfg=False  # Disable two-step CFG for stability
     )
 except Exception as e:
@@ -63,13 +68,12 @@ except Exception as e:
     sys.exit(1)
 try:
-    print("Loading Bark small model into system RAM...")
     bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
-    bark_model = BarkModel.from_pretrained("suno/bark-small")
-    bark_model = bark_model.to("cpu")  # Offload to CPU initially
 except Exception as e:
     print(f"ERROR: Failed to load Bark model: {e}")
-    print("Ensure Bark model weights are available and dependencies are installed.")
     sys.exit(1)
 # 3) RESOURCE MONITORING FUNCTION
@@ -78,15 +82,18 @@ def print_resource_usage(stage: str):
     print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
     print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
     print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
     print("---------------")
 # Check available GPU memory
-def check_vram_availability(required_gb=4.5):  # Adjusted for MusicGen + Bark
     total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
     allocated_vram = torch.cuda.memory_allocated() / (1024**3)
     available_vram = total_vram - allocated_vram
     if available_vram < required_gb:
-        print(f"WARNING: Low VRAM available ({available_vram:.2f} GB). Reduce total_duration or chunk_duration.")
     return available_vram >= required_gb
 # 4) GENRE PROMPT FUNCTIONS
@@ -267,7 +274,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
     try:
         print("Generating vocals with Bark...")
         # Move Bark model to GPU
-        bark_model = bark_model.to(accelerator.device)
         # Process vocal prompt
         inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
@@ -291,7 +298,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
         # Move Bark model back to CPU
         bark_model = bark_model.to("cpu")
-        torch.cuda.empty_cache()
         return vocal_segment, "✅ Vocals generated successfully."
     except Exception as e:
@@ -306,7 +313,7 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
     try:
         start_time = time.time()
         total_duration = total_duration  # Validated by radio button (30, 60, 90, 120)
-        chunk_duration = min(max(chunk_duration, 5), 15)
         num_chunks = max(1, total_duration // chunk_duration)
         chunk_duration = total_duration / num_chunks
         overlap_duration = min(1.0, crossfade_duration / 1000.0)
@@ -314,14 +321,17 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
         sample_rate = musicgen_model.sample_rate
         audio_segments = []
-        if not check_vram_availability(required_gb=4.5):
-            return None, "⚠️ Insufficient VRAM for generation. Reduce total_duration or chunk_duration."
         print("Generating instrumental audio...")
         seed = 42
         torch.manual_seed(seed)
         np.random.seed(seed)
         for i in range(num_chunks):
             chunk_prompt = instrumental_prompt
             print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
@@ -360,13 +370,13 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
             os.remove(temp_wav_path)
             audio_segments.append(segment)
-            torch.cuda.empty_cache()
-            gc.collect()
-            torch.cuda.ipc_collect()
-            torch.cuda.synchronize()
-            time.sleep(0.5)
             print_resource_usage(f"After Chunk {i+1} Generation")
         print("Combining instrumental chunks...")
         final_segment = audio_segments[0]
         for i in range(1, len(audio_segments)):
@@ -405,14 +415,11 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
     except Exception as e:
         return None, f"❌ Generation failed: {e}"
     finally:
-        torch.cuda.empty_cache()
-        gc.collect()
-        torch.cuda.ipc_collect()
-        torch.cuda.synchronize()
 # Function to clear inputs
 def clear_inputs():
-    return "", "", 3.0, 250, 0.9, 1.0, 30, 10, 1000, 120, "none", "none", "none", "none", "none"
 # 8) CUSTOM CSS
 css = """
@@ -560,7 +567,7 @@ with gr.Blocks(css=css) as demo:
                 maximum=1.0,
                 value=0.9,
                 step=0.05,
-                info="Keeps tokens with cumulative probability above p."
             )
             temperature = gr.Slider(
                 label="Temperature 🔥",
@@ -579,10 +586,10 @@ with gr.Blocks(css=css) as demo:
             chunk_duration = gr.Slider(
                 label="Chunk Duration ⏱️ (seconds)",
                 minimum=5,
-                maximum=15,
-                value=10,
                 step=1,
-                info="Duration of each chunk to render (5 to 15 seconds)."
             )
             crossfade_duration = gr.Slider(
                 label="Crossfade Duration 🎶 (ms)",
@@ -686,3 +693,4 @@ try:
     fastapi_app.openapi_url = None
 except Exception:
     pass

+```python
 import os
 import torch
 import torchaudio
 import random
 from transformers import AutoProcessor, BarkModel
 from accelerate import Accelerator
+import bitsandbytes as bnb
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore")
 # Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
 # Check critical dependencies
 if np.__version__ != "1.23.5":
     sys.exit(1)
 print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
+# Initialize accelerator with enhanced CPU offloading
+accelerator = Accelerator(mixed_precision="fp16", cpu_offload=True)
 # Pre-run memory cleanup
+def aggressive_memory_cleanup():
+    torch.cuda.empty_cache()
+    gc.collect()
+    torch.cuda.ipc_collect()
+    torch.cuda.synchronize()
+    print("Performed aggressive memory cleanup.")
+aggressive_memory_cleanup()
 # 2) LOAD MODELS
 try:
         print(f"ERROR: Local model path {local_model_path} does not exist.")
         print("Please download the MusicGen medium model weights and place them in the correct directory.")
         sys.exit(1)
+    musicgen_model = MusicGen.get_pretrained(local_model_path, device="cpu")  # Load to CPU initially
     musicgen_model.set_generation_params(
+        duration=5,  # Lower default chunk duration
         two_step_cfg=False  # Disable two-step CFG for stability
     )
 except Exception as e:
     sys.exit(1)
 try:
+    print("Loading Bark small model into system RAM with 4-bit quantization...")
     bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
+    bark_model = BarkModel.from_pretrained("suno/bark-small", load_in_4bit=True, device_map="cpu")  # Quantize and offload
 except Exception as e:
     print(f"ERROR: Failed to load Bark model: {e}")
+    print("Ensure Bark model weights and bitsandbytes are installed.")
     sys.exit(1)
 # 3) RESOURCE MONITORING FUNCTION
     print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
     print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
     print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
+    print(f"System RAM Available: {psutil.virtual_memory().available / (1024**3):.2f} GB")
     print("---------------")
 # Check available GPU memory
+def check_vram_availability(required_gb=3.0):  # Lowered threshold
     total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
     allocated_vram = torch.cuda.memory_allocated() / (1024**3)
     available_vram = total_vram - allocated_vram
     if available_vram < required_gb:
+        print(f"WARNING: Low VRAM available ({available_vram:.2f} GB < {required_gb:.2f} GB required).")
+        print("Reduce total_duration, chunk_duration, or enable more CPU offloading.")
+    print(f"Total VRAM: {total_vram:.2f} GB, Available: {available_vram:.2f} GB")
     return available_vram >= required_gb
 # 4) GENRE PROMPT FUNCTIONS
     try:
         print("Generating vocals with Bark...")
         # Move Bark model to GPU
+        bark_model = accelerator.prepare(bark_model)
         # Process vocal prompt
         inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
         # Move Bark model back to CPU
         bark_model = bark_model.to("cpu")
+        aggressive_memory_cleanup()
         return vocal_segment, "✅ Vocals generated successfully."
     except Exception as e:
     try:
         start_time = time.time()
         total_duration = total_duration  # Validated by radio button (30, 60, 90, 120)
+        chunk_duration = min(max(chunk_duration, 5), 10)  # Lower max to 10s
         num_chunks = max(1, total_duration // chunk_duration)
         chunk_duration = total_duration / num_chunks
         overlap_duration = min(1.0, crossfade_duration / 1000.0)
         sample_rate = musicgen_model.sample_rate
         audio_segments = []
+        if not check_vram_availability(required_gb=3.0):
+            return None, "⚠️ Insufficient VRAM for generation. Try reducing total_duration or chunk_duration further."
         print("Generating instrumental audio...")
         seed = 42
         torch.manual_seed(seed)
         np.random.seed(seed)
+        # Move MusicGen to GPU
+        musicgen_model = accelerator.prepare(musicgen_model)
         for i in range(num_chunks):
             chunk_prompt = instrumental_prompt
             print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
             os.remove(temp_wav_path)
             audio_segments.append(segment)
+            aggressive_memory_cleanup()
             print_resource_usage(f"After Chunk {i+1} Generation")
+        # Move MusicGen back to CPU
+        musicgen_model = musicgen_model.to("cpu")
+        aggressive_memory_cleanup()
         print("Combining instrumental chunks...")
         final_segment = audio_segments[0]
         for i in range(1, len(audio_segments)):
     except Exception as e:
         return None, f"❌ Generation failed: {e}"
     finally:
+        aggressive_memory_cleanup()
 # Function to clear inputs
 def clear_inputs():
+    return "", "", 3.0, 250, 0.9, 1.0, 30, 5, 1000, 120, "none", "none", "none", "none", "none"
 # 8) CUSTOM CSS
 css = """
                 maximum=1.0,
                 value=0.9,
                 step=0.05,
+                pair_with="Keeps tokens with cumulative probability above p."
             )
             temperature = gr.Slider(
                 label="Temperature 🔥",
             chunk_duration = gr.Slider(
                 label="Chunk Duration ⏱️ (seconds)",
                 minimum=5,
+                maximum=10,
+                value=5,  # Lower default
                 step=1,
+                info="Duration of each chunk to render (5 to 10 seconds)."
             )
             crossfade_duration = gr.Slider(
                 label="Crossfade Duration 🎶 (ms)",
     fastapi_app.openapi_url = None
 except Exception:
     pass
+```