Update barks.py
Browse files
barks.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
import torch
|
| 4 |
import torchaudio
|
|
@@ -15,12 +15,13 @@ import warnings
|
|
| 15 |
import random
|
| 16 |
from transformers import AutoProcessor, BarkModel
|
| 17 |
from accelerate import Accelerator
|
|
|
|
| 18 |
|
| 19 |
# Suppress warnings for cleaner output
|
| 20 |
warnings.filterwarnings("ignore")
|
| 21 |
|
| 22 |
# Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
|
| 23 |
-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:
|
| 24 |
|
| 25 |
# Check critical dependencies
|
| 26 |
if np.__version__ != "1.23.5":
|
|
@@ -35,14 +36,18 @@ if device != "cuda":
|
|
| 35 |
sys.exit(1)
|
| 36 |
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
| 37 |
|
| 38 |
-
# Initialize accelerator
|
| 39 |
-
accelerator = Accelerator(mixed_precision="fp16")
|
| 40 |
|
| 41 |
# Pre-run memory cleanup
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
torch.cuda.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# 2) LOAD MODELS
|
| 48 |
try:
|
|
@@ -52,9 +57,9 @@ try:
|
|
| 52 |
print(f"ERROR: Local model path {local_model_path} does not exist.")
|
| 53 |
print("Please download the MusicGen medium model weights and place them in the correct directory.")
|
| 54 |
sys.exit(1)
|
| 55 |
-
musicgen_model = MusicGen.get_pretrained(local_model_path, device=
|
| 56 |
musicgen_model.set_generation_params(
|
| 57 |
-
duration=
|
| 58 |
two_step_cfg=False # Disable two-step CFG for stability
|
| 59 |
)
|
| 60 |
except Exception as e:
|
|
@@ -63,13 +68,12 @@ except Exception as e:
|
|
| 63 |
sys.exit(1)
|
| 64 |
|
| 65 |
try:
|
| 66 |
-
print("Loading Bark small model into system RAM...")
|
| 67 |
bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
| 68 |
-
bark_model = BarkModel.from_pretrained("suno/bark-small")
|
| 69 |
-
bark_model = bark_model.to("cpu") # Offload to CPU initially
|
| 70 |
except Exception as e:
|
| 71 |
print(f"ERROR: Failed to load Bark model: {e}")
|
| 72 |
-
print("Ensure Bark model weights
|
| 73 |
sys.exit(1)
|
| 74 |
|
| 75 |
# 3) RESOURCE MONITORING FUNCTION
|
|
@@ -78,15 +82,18 @@ def print_resource_usage(stage: str):
|
|
| 78 |
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
|
| 79 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
| 80 |
print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
|
|
|
|
| 81 |
print("---------------")
|
| 82 |
|
| 83 |
# Check available GPU memory
|
| 84 |
-
def check_vram_availability(required_gb=
|
| 85 |
total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 86 |
allocated_vram = torch.cuda.memory_allocated() / (1024**3)
|
| 87 |
available_vram = total_vram - allocated_vram
|
| 88 |
if available_vram < required_gb:
|
| 89 |
-
print(f"WARNING: Low VRAM available ({available_vram:.2f} GB
|
|
|
|
|
|
|
| 90 |
return available_vram >= required_gb
|
| 91 |
|
| 92 |
# 4) GENRE PROMPT FUNCTIONS
|
|
@@ -267,7 +274,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
|
|
| 267 |
try:
|
| 268 |
print("Generating vocals with Bark...")
|
| 269 |
# Move Bark model to GPU
|
| 270 |
-
bark_model =
|
| 271 |
|
| 272 |
# Process vocal prompt
|
| 273 |
inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
|
|
@@ -291,7 +298,7 @@ def generate_vocals(vocal_prompt: str, total_duration: int):
|
|
| 291 |
|
| 292 |
# Move Bark model back to CPU
|
| 293 |
bark_model = bark_model.to("cpu")
|
| 294 |
-
|
| 295 |
|
| 296 |
return vocal_segment, "✅ Vocals generated successfully."
|
| 297 |
except Exception as e:
|
|
@@ -306,7 +313,7 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
| 306 |
try:
|
| 307 |
start_time = time.time()
|
| 308 |
total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
|
| 309 |
-
chunk_duration = min(max(chunk_duration, 5),
|
| 310 |
num_chunks = max(1, total_duration // chunk_duration)
|
| 311 |
chunk_duration = total_duration / num_chunks
|
| 312 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
|
@@ -314,14 +321,17 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
| 314 |
sample_rate = musicgen_model.sample_rate
|
| 315 |
audio_segments = []
|
| 316 |
|
| 317 |
-
if not check_vram_availability(required_gb=
|
| 318 |
-
return None, "⚠️ Insufficient VRAM for generation.
|
| 319 |
|
| 320 |
print("Generating instrumental audio...")
|
| 321 |
seed = 42
|
| 322 |
torch.manual_seed(seed)
|
| 323 |
np.random.seed(seed)
|
| 324 |
|
|
|
|
|
|
|
|
|
|
| 325 |
for i in range(num_chunks):
|
| 326 |
chunk_prompt = instrumental_prompt
|
| 327 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
|
@@ -360,13 +370,13 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
| 360 |
os.remove(temp_wav_path)
|
| 361 |
audio_segments.append(segment)
|
| 362 |
|
| 363 |
-
|
| 364 |
-
gc.collect()
|
| 365 |
-
torch.cuda.ipc_collect()
|
| 366 |
-
torch.cuda.synchronize()
|
| 367 |
-
time.sleep(0.5)
|
| 368 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
print("Combining instrumental chunks...")
|
| 371 |
final_segment = audio_segments[0]
|
| 372 |
for i in range(1, len(audio_segments)):
|
|
@@ -405,14 +415,11 @@ def generate_music(instrumental_prompt: str, vocal_prompt: str, cfg_scale: float
|
|
| 405 |
except Exception as e:
|
| 406 |
return None, f"❌ Generation failed: {e}"
|
| 407 |
finally:
|
| 408 |
-
|
| 409 |
-
gc.collect()
|
| 410 |
-
torch.cuda.ipc_collect()
|
| 411 |
-
torch.cuda.synchronize()
|
| 412 |
|
| 413 |
# Function to clear inputs
|
| 414 |
def clear_inputs():
|
| 415 |
-
return "", "", 3.0, 250, 0.9, 1.0, 30,
|
| 416 |
|
| 417 |
# 8) CUSTOM CSS
|
| 418 |
css = """
|
|
@@ -560,7 +567,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 560 |
maximum=1.0,
|
| 561 |
value=0.9,
|
| 562 |
step=0.05,
|
| 563 |
-
|
| 564 |
)
|
| 565 |
temperature = gr.Slider(
|
| 566 |
label="Temperature 🔥",
|
|
@@ -579,10 +586,10 @@ with gr.Blocks(css=css) as demo:
|
|
| 579 |
chunk_duration = gr.Slider(
|
| 580 |
label="Chunk Duration ⏱️ (seconds)",
|
| 581 |
minimum=5,
|
| 582 |
-
maximum=
|
| 583 |
-
value=
|
| 584 |
step=1,
|
| 585 |
-
info="Duration of each chunk to render (5 to
|
| 586 |
)
|
| 587 |
crossfade_duration = gr.Slider(
|
| 588 |
label="Crossfade Duration 🎶 (ms)",
|
|
@@ -686,3 +693,4 @@ try:
|
|
| 686 |
fastapi_app.openapi_url = None
|
| 687 |
except Exception:
|
| 688 |
pass
|
|
|
|
|
|
| 1 |
+
```python
|
| 2 |
import os
|
| 3 |
import torch
|
| 4 |
import torchaudio
|
|
|
|
| 15 |
import random
|
| 16 |
from transformers import AutoProcessor, BarkModel
|
| 17 |
from accelerate import Accelerator
|
| 18 |
+
import bitsandbytes as bnb
|
| 19 |
|
| 20 |
# Suppress warnings for cleaner output
|
| 21 |
warnings.filterwarnings("ignore")
|
| 22 |
|
| 23 |
# Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
|
| 24 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"
|
| 25 |
|
| 26 |
# Check critical dependencies
|
| 27 |
if np.__version__ != "1.23.5":
|
|
|
|
| 36 |
sys.exit(1)
|
| 37 |
print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
|
| 38 |
|
| 39 |
+
# Initialize accelerator with enhanced CPU offloading
|
| 40 |
+
accelerator = Accelerator(mixed_precision="fp16", cpu_offload=True)
|
| 41 |
|
| 42 |
# Pre-run memory cleanup
|
| 43 |
+
def aggressive_memory_cleanup():
|
| 44 |
+
torch.cuda.empty_cache()
|
| 45 |
+
gc.collect()
|
| 46 |
+
torch.cuda.ipc_collect()
|
| 47 |
+
torch.cuda.synchronize()
|
| 48 |
+
print("Performed aggressive memory cleanup.")
|
| 49 |
+
|
| 50 |
+
aggressive_memory_cleanup()
|
| 51 |
|
| 52 |
# 2) LOAD MODELS
|
| 53 |
try:
|
|
|
|
| 57 |
print(f"ERROR: Local model path {local_model_path} does not exist.")
|
| 58 |
print("Please download the MusicGen medium model weights and place them in the correct directory.")
|
| 59 |
sys.exit(1)
|
| 60 |
+
musicgen_model = MusicGen.get_pretrained(local_model_path, device="cpu") # Load to CPU initially
|
| 61 |
musicgen_model.set_generation_params(
|
| 62 |
+
duration=5, # Lower default chunk duration
|
| 63 |
two_step_cfg=False # Disable two-step CFG for stability
|
| 64 |
)
|
| 65 |
except Exception as e:
|
|
|
|
| 68 |
sys.exit(1)
|
| 69 |
|
| 70 |
try:
|
| 71 |
+
print("Loading Bark small model into system RAM with 4-bit quantization...")
|
| 72 |
bark_processor = AutoProcessor.from_pretrained("suno/bark-small")
|
| 73 |
+
bark_model = BarkModel.from_pretrained("suno/bark-small", load_in_4bit=True, device_map="cpu") # Quantize and offload
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
print(f"ERROR: Failed to load Bark model: {e}")
|
| 76 |
+
print("Ensure Bark model weights and bitsandbytes are installed.")
|
| 77 |
sys.exit(1)
|
| 78 |
|
| 79 |
# 3) RESOURCE MONITORING FUNCTION
|
|
|
|
| 82 |
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
|
| 83 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
| 84 |
print(f"CPU Memory Used: {psutil.virtual_memory().percent}%")
|
| 85 |
+
print(f"System RAM Available: {psutil.virtual_memory().available / (1024**3):.2f} GB")
|
| 86 |
print("---------------")
|
| 87 |
|
| 88 |
# Check available GPU memory
|
| 89 |
+
def check_vram_availability(required_gb=3.0): # Lowered threshold
|
| 90 |
total_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 91 |
allocated_vram = torch.cuda.memory_allocated() / (1024**3)
|
| 92 |
available_vram = total_vram - allocated_vram
|
| 93 |
if available_vram < required_gb:
|
| 94 |
+
print(f"WARNING: Low VRAM available ({available_vram:.2f} GB < {required_gb:.2f} GB required).")
|
| 95 |
+
print("Reduce total_duration, chunk_duration, or enable more CPU offloading.")
|
| 96 |
+
print(f"Total VRAM: {total_vram:.2f} GB, Available: {available_vram:.2f} GB")
|
| 97 |
return available_vram >= required_gb
|
| 98 |
|
| 99 |
# 4) GENRE PROMPT FUNCTIONS
|
|
|
|
| 274 |
try:
|
| 275 |
print("Generating vocals with Bark...")
|
| 276 |
# Move Bark model to GPU
|
| 277 |
+
bark_model = accelerator.prepare(bark_model)
|
| 278 |
|
| 279 |
# Process vocal prompt
|
| 280 |
inputs = bark_processor(vocal_prompt, return_tensors="pt").to(accelerator.device)
|
|
|
|
| 298 |
|
| 299 |
# Move Bark model back to CPU
|
| 300 |
bark_model = bark_model.to("cpu")
|
| 301 |
+
aggressive_memory_cleanup()
|
| 302 |
|
| 303 |
return vocal_segment, "✅ Vocals generated successfully."
|
| 304 |
except Exception as e:
|
|
|
|
| 313 |
try:
|
| 314 |
start_time = time.time()
|
| 315 |
total_duration = total_duration # Validated by radio button (30, 60, 90, 120)
|
| 316 |
+
chunk_duration = min(max(chunk_duration, 5), 10) # Lower max to 10s
|
| 317 |
num_chunks = max(1, total_duration // chunk_duration)
|
| 318 |
chunk_duration = total_duration / num_chunks
|
| 319 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
|
|
|
| 321 |
sample_rate = musicgen_model.sample_rate
|
| 322 |
audio_segments = []
|
| 323 |
|
| 324 |
+
if not check_vram_availability(required_gb=3.0):
|
| 325 |
+
return None, "⚠️ Insufficient VRAM for generation. Try reducing total_duration or chunk_duration further."
|
| 326 |
|
| 327 |
print("Generating instrumental audio...")
|
| 328 |
seed = 42
|
| 329 |
torch.manual_seed(seed)
|
| 330 |
np.random.seed(seed)
|
| 331 |
|
| 332 |
+
# Move MusicGen to GPU
|
| 333 |
+
musicgen_model = accelerator.prepare(musicgen_model)
|
| 334 |
+
|
| 335 |
for i in range(num_chunks):
|
| 336 |
chunk_prompt = instrumental_prompt
|
| 337 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
|
|
|
| 370 |
os.remove(temp_wav_path)
|
| 371 |
audio_segments.append(segment)
|
| 372 |
|
| 373 |
+
aggressive_memory_cleanup()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
| 375 |
|
| 376 |
+
# Move MusicGen back to CPU
|
| 377 |
+
musicgen_model = musicgen_model.to("cpu")
|
| 378 |
+
aggressive_memory_cleanup()
|
| 379 |
+
|
| 380 |
print("Combining instrumental chunks...")
|
| 381 |
final_segment = audio_segments[0]
|
| 382 |
for i in range(1, len(audio_segments)):
|
|
|
|
| 415 |
except Exception as e:
|
| 416 |
return None, f"❌ Generation failed: {e}"
|
| 417 |
finally:
|
| 418 |
+
aggressive_memory_cleanup()
|
|
|
|
|
|
|
|
|
|
| 419 |
|
| 420 |
# Function to clear inputs
|
| 421 |
def clear_inputs():
|
| 422 |
+
return "", "", 3.0, 250, 0.9, 1.0, 30, 5, 1000, 120, "none", "none", "none", "none", "none"
|
| 423 |
|
| 424 |
# 8) CUSTOM CSS
|
| 425 |
css = """
|
|
|
|
| 567 |
maximum=1.0,
|
| 568 |
value=0.9,
|
| 569 |
step=0.05,
|
| 570 |
+
pair_with="Keeps tokens with cumulative probability above p."
|
| 571 |
)
|
| 572 |
temperature = gr.Slider(
|
| 573 |
label="Temperature 🔥",
|
|
|
|
| 586 |
chunk_duration = gr.Slider(
|
| 587 |
label="Chunk Duration ⏱️ (seconds)",
|
| 588 |
minimum=5,
|
| 589 |
+
maximum=10,
|
| 590 |
+
value=5, # Lower default
|
| 591 |
step=1,
|
| 592 |
+
info="Duration of each chunk to render (5 to 10 seconds)."
|
| 593 |
)
|
| 594 |
crossfade_duration = gr.Slider(
|
| 595 |
label="Crossfade Duration 🎶 (ms)",
|
|
|
|
| 693 |
fastapi_app.openapi_url = None
|
| 694 |
except Exception:
|
| 695 |
pass
|
| 696 |
+
```
|