Update app.py
Browse files
app.py
CHANGED
|
@@ -47,59 +47,75 @@ def print_resource_usage(stage: str):
|
|
| 47 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
| 48 |
print("---------------")
|
| 49 |
|
| 50 |
-
# 4) GENRE PROMPT FUNCTIONS (
|
| 51 |
-
def
|
| 52 |
-
return "
|
| 53 |
|
| 54 |
-
def
|
| 55 |
-
return "
|
| 56 |
|
| 57 |
-
def
|
| 58 |
-
return "
|
| 59 |
|
| 60 |
-
def
|
| 61 |
-
return "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
def
|
| 64 |
-
return "
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def set_pop_rock_prompt():
|
| 67 |
-
return "Pop rock with catchy electric guitar riffs, uplifting
|
| 68 |
|
| 69 |
def set_fusion_jazz_prompt():
|
| 70 |
-
return "Fusion jazz with electric piano, funky basslines, intricate drum patterns, soaring trumpet, and a Herbie Hancock-inspired groove,
|
| 71 |
|
| 72 |
-
def
|
| 73 |
-
return "
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
return "
|
| 77 |
|
| 78 |
-
# 5) AUDIO PROCESSING FUNCTIONS
|
| 79 |
def apply_chorus(segment):
|
| 80 |
-
|
| 81 |
-
delayed = segment - 6 # Reduced gain to -6 dB for a subtler effect
|
| 82 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
| 83 |
return segment.overlay(delayed, position=20)
|
| 84 |
|
| 85 |
def apply_eq(segment):
|
| 86 |
-
|
| 87 |
-
segment = segment.
|
| 88 |
-
segment = segment.high_pass_filter(80) # Lowered cutoff to 80Hz for deeper bass
|
| 89 |
return segment
|
| 90 |
|
| 91 |
def apply_limiter(segment, max_db=-3.0):
|
| 92 |
-
# Apply limiter with a higher threshold to preserve dynamics
|
| 93 |
if segment.dBFS > max_db:
|
| 94 |
segment = segment - (segment.dBFS - max_db)
|
| 95 |
return segment
|
| 96 |
|
| 97 |
def apply_final_gain(segment, target_db=-12.0):
|
| 98 |
-
# Add final gain adjustment for consistent loudness
|
| 99 |
gain_adjustment = target_db - segment.dBFS
|
| 100 |
return segment + gain_adjustment
|
| 101 |
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
| 104 |
global musicgen_model
|
| 105 |
if not instrumental_prompt.strip():
|
|
@@ -107,27 +123,22 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 107 |
try:
|
| 108 |
start_time = time.time()
|
| 109 |
|
| 110 |
-
# Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
|
| 111 |
total_duration = min(max(total_duration, 10), 90)
|
| 112 |
chunk_duration = 15
|
| 113 |
-
# Use 2 chunks for durations up to 30 seconds, 3 chunks for longer durations
|
| 114 |
num_chunks = 2 if total_duration <= 30 else 3
|
| 115 |
chunk_duration = total_duration / num_chunks
|
| 116 |
|
| 117 |
-
# Generate slightly longer chunks for overlap
|
| 118 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
| 119 |
generation_duration = chunk_duration + overlap_duration
|
| 120 |
|
| 121 |
audio_chunks = []
|
| 122 |
sample_rate = musicgen_model.sample_rate
|
| 123 |
|
| 124 |
-
|
| 125 |
-
torch.manual_seed(42) # Fixed seed for reproducibility
|
| 126 |
np.random.seed(42)
|
| 127 |
|
| 128 |
-
# Generate audio in chunks with a consistent prompt
|
| 129 |
for i in range(num_chunks):
|
| 130 |
-
chunk_prompt = instrumental_prompt
|
| 131 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
| 132 |
musicgen_model.set_generation_params(
|
| 133 |
duration=generation_duration,
|
|
@@ -171,18 +182,15 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 171 |
time.sleep(0.5)
|
| 172 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
| 173 |
|
| 174 |
-
# Combine chunks with crossfade
|
| 175 |
print("Combining audio chunks...")
|
| 176 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
| 177 |
for i in range(1, len(audio_chunks)):
|
| 178 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
| 179 |
-
next_segment = next_segment + 1
|
| 180 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
| 181 |
|
| 182 |
-
# Trim to exact total duration
|
| 183 |
final_segment = final_segment[:total_duration * 1000]
|
| 184 |
|
| 185 |
-
# Post-process with improved dynamics
|
| 186 |
print("Post-processing final track...")
|
| 187 |
final_segment = apply_eq(final_segment)
|
| 188 |
final_segment = apply_chorus(final_segment)
|
|
@@ -361,15 +369,20 @@ with gr.Blocks(css=css) as demo:
|
|
| 361 |
elem_classes="textbox"
|
| 362 |
)
|
| 363 |
with gr.Row(elem_classes="genre-buttons"):
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
pop_rock_btn = gr.Button("Pop Rock", elem_classes="genre-btn")
|
| 370 |
fusion_jazz_btn = gr.Button("Fusion Jazz", elem_classes="genre-btn")
|
| 371 |
-
|
| 372 |
-
|
| 373 |
|
| 374 |
with gr.Column(elem_classes="settings-container"):
|
| 375 |
cfg_scale = gr.Slider(
|
|
@@ -428,15 +441,20 @@ with gr.Blocks(css=css) as demo:
|
|
| 428 |
out_audio = gr.Audio(label="Generated Stereo Instrumental Track", type="filepath")
|
| 429 |
status = gr.Textbox(label="Status", interactive=False)
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
pop_rock_btn.click(set_pop_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 437 |
fusion_jazz_btn.click(set_fusion_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 438 |
-
|
| 439 |
-
|
| 440 |
gen_btn.click(
|
| 441 |
generate_music,
|
| 442 |
inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration],
|
|
|
|
| 47 |
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
|
| 48 |
print("---------------")
|
| 49 |
|
| 50 |
+
# 4) GENRE PROMPT FUNCTIONS (Redesigned for better track generation)
|
| 51 |
+
def set_classic_rock_prompt():
|
| 52 |
+
return "Classic rock with bluesy electric guitars, steady drums, groovy bass, Hammond organ fills, and a Led Zeppelin-inspired raw energy, maintaining a cohesive structure with dynamic solos and powerful choruses."
|
| 53 |
|
| 54 |
+
def set_alternative_rock_prompt():
|
| 55 |
+
return "Alternative rock with distorted guitar riffs, punchy drums, melodic basslines, atmospheric synths, and a Nirvana-inspired grunge vibe, featuring introspective verses and explosive choruses."
|
| 56 |
|
| 57 |
+
def set_detroit_techno_prompt():
|
| 58 |
+
return "Detroit techno with deep pulsing synths, driving basslines, crisp hi-hats, atmospheric pads, and a rhythmic groove inspired by Juan Atkins, maintaining a hypnotic and energetic flow."
|
| 59 |
|
| 60 |
+
def set_deep_house_prompt():
|
| 61 |
+
return "Deep house with warm analog synth chords, soulful vocal chops, deep basslines, crisp hi-hats, and a laid-back groove inspired by Larry Heard, creating a consistent hypnotic vibe with smooth transitions."
|
| 62 |
+
|
| 63 |
+
def set_smooth_jazz_prompt():
|
| 64 |
+
return "Smooth jazz with warm saxophone leads, expressive Rhodes piano chords, soft bossa nova drums, upright bass, and a George Benson-inspired improvisational feel, maintaining a cohesive and relaxing vibe."
|
| 65 |
+
|
| 66 |
+
def set_bebop_jazz_prompt():
|
| 67 |
+
return "Bebop jazz with fast-paced saxophone solos, intricate piano runs, walking basslines, complex drum patterns, and a Charlie Parker-inspired improvisational style, featuring dynamic shifts and virtuosic performances."
|
| 68 |
+
|
| 69 |
+
def set_baroque_classical_prompt():
|
| 70 |
+
return "Baroque classical with harpsichord, delicate violin, cello, flute, and a Vivaldi-inspired melodic structure, featuring intricate counterpoint and elegant ornamentation, maintaining a consistent baroque elegance."
|
| 71 |
+
|
| 72 |
+
def set_romantic_classical_prompt():
|
| 73 |
+
return "Romantic classical with lush strings, expressive piano, dramatic brass, subtle woodwinds, and a Chopin-inspired melodic flow, building emotional intensity with sweeping crescendos and delicate pianissimos."
|
| 74 |
|
| 75 |
+
def set_boom_bap_hiphop_prompt():
|
| 76 |
+
return "Boom bap hip-hop with gritty sampled drums, deep basslines, jazzy piano loops, vinyl scratches, and a J Dilla-inspired rhythmic groove, maintaining a consistent head-nodding vibe."
|
| 77 |
+
|
| 78 |
+
def set_trap_hiphop_prompt():
|
| 79 |
+
return "Trap hip-hop with hard-hitting 808 bass, snappy snares, rapid hi-hats, eerie synth melodies, and a modern Atlanta-inspired sound, featuring catchy hooks and energetic drops."
|
| 80 |
|
| 81 |
def set_pop_rock_prompt():
|
| 82 |
+
return "Pop rock with catchy electric guitar riffs, uplifting synths, steady drums, melodic basslines, and a Coldplay-inspired anthemic feel, featuring bright intros and powerful choruses."
|
| 83 |
|
| 84 |
def set_fusion_jazz_prompt():
|
| 85 |
+
return "Fusion jazz with electric piano, funky basslines, intricate drum patterns, soaring trumpet, and a Herbie Hancock-inspired groove, blending jazz improvisation with rock and funk elements."
|
| 86 |
|
| 87 |
+
def set_edm_prompt():
|
| 88 |
+
return "EDM with high-energy synth leads, pounding basslines, four-on-the-floor kicks, euphoric breakdowns, and a festival-ready drop, inspired by artists like Avicii and Calvin Harris."
|
| 89 |
|
| 90 |
+
def set_indie_folk_prompt():
|
| 91 |
+
return "Indie folk with acoustic guitars, heartfelt vocals, gentle percussion, warm bass, and a Bon Iver-inspired intimate atmosphere, featuring layered harmonies and emotional crescendos."
|
| 92 |
|
| 93 |
+
# 5) AUDIO PROCESSING FUNCTIONS (Unchanged)
|
| 94 |
def apply_chorus(segment):
|
| 95 |
+
delayed = segment - 6
|
|
|
|
| 96 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
| 97 |
return segment.overlay(delayed, position=20)
|
| 98 |
|
| 99 |
def apply_eq(segment):
|
| 100 |
+
segment = segment.low_pass_filter(8000)
|
| 101 |
+
segment = segment.high_pass_filter(80)
|
|
|
|
| 102 |
return segment
|
| 103 |
|
| 104 |
def apply_limiter(segment, max_db=-3.0):
|
|
|
|
| 105 |
if segment.dBFS > max_db:
|
| 106 |
segment = segment - (segment.dBFS - max_db)
|
| 107 |
return segment
|
| 108 |
|
| 109 |
def apply_final_gain(segment, target_db=-12.0):
|
|
|
|
| 110 |
gain_adjustment = target_db - segment.dBFS
|
| 111 |
return segment + gain_adjustment
|
| 112 |
|
| 113 |
+
def apply_fade(segment, fade_in_duration=2000, fade_out_duration=2000):
|
| 114 |
+
segment = segment.fade_in(fade_in_duration)
|
| 115 |
+
segment = segment.fade_out(fade_out_duration)
|
| 116 |
+
return segment
|
| 117 |
+
|
| 118 |
+
# 6) GENERATION & I/O FUNCTIONS (Unchanged)
|
| 119 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
| 120 |
global musicgen_model
|
| 121 |
if not instrumental_prompt.strip():
|
|
|
|
| 123 |
try:
|
| 124 |
start_time = time.time()
|
| 125 |
|
|
|
|
| 126 |
total_duration = min(max(total_duration, 10), 90)
|
| 127 |
chunk_duration = 15
|
|
|
|
| 128 |
num_chunks = 2 if total_duration <= 30 else 3
|
| 129 |
chunk_duration = total_duration / num_chunks
|
| 130 |
|
|
|
|
| 131 |
overlap_duration = min(1.0, crossfade_duration / 1000.0)
|
| 132 |
generation_duration = chunk_duration + overlap_duration
|
| 133 |
|
| 134 |
audio_chunks = []
|
| 135 |
sample_rate = musicgen_model.sample_rate
|
| 136 |
|
| 137 |
+
torch.manual_seed(42)
|
|
|
|
| 138 |
np.random.seed(42)
|
| 139 |
|
|
|
|
| 140 |
for i in range(num_chunks):
|
| 141 |
+
chunk_prompt = instrumental_prompt
|
| 142 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
| 143 |
musicgen_model.set_generation_params(
|
| 144 |
duration=generation_duration,
|
|
|
|
| 182 |
time.sleep(0.5)
|
| 183 |
print_resource_usage(f"After Chunk {i+1} Generation")
|
| 184 |
|
|
|
|
| 185 |
print("Combining audio chunks...")
|
| 186 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
| 187 |
for i in range(1, len(audio_chunks)):
|
| 188 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
| 189 |
+
next_segment = next_segment + 1
|
| 190 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
| 191 |
|
|
|
|
| 192 |
final_segment = final_segment[:total_duration * 1000]
|
| 193 |
|
|
|
|
| 194 |
print("Post-processing final track...")
|
| 195 |
final_segment = apply_eq(final_segment)
|
| 196 |
final_segment = apply_chorus(final_segment)
|
|
|
|
| 369 |
elem_classes="textbox"
|
| 370 |
)
|
| 371 |
with gr.Row(elem_classes="genre-buttons"):
|
| 372 |
+
classic_rock_btn = gr.Button("Classic Rock", elem_classes="genre-btn")
|
| 373 |
+
alternative_rock_btn = gr.Button("Alternative Rock", elem_classes="genre-btn")
|
| 374 |
+
detroit_techno_btn = gr.Button("Detroit Techno", elem_classes="genre-btn")
|
| 375 |
+
deep_house_btn = gr.Button("Deep House", elem_classes="genre-btn")
|
| 376 |
+
smooth_jazz_btn = gr.Button("Smooth Jazz", elem_classes="genre-btn")
|
| 377 |
+
bebop_jazz_btn = gr.Button("Bebop Jazz", elem_classes="genre-btn")
|
| 378 |
+
baroque_classical_btn = gr.Button("Baroque Classical", elem_classes="genre-btn")
|
| 379 |
+
romantic_classical_btn = gr.Button("Romantic Classical", elem_classes="genre-btn")
|
| 380 |
+
boom_bap_hiphop_btn = gr.Button("Boom Bap Hip-Hop", elem_classes="genre-btn")
|
| 381 |
+
trap_hiphop_btn = gr.Button("Trap Hip-Hop", elem_classes="genre-btn")
|
| 382 |
pop_rock_btn = gr.Button("Pop Rock", elem_classes="genre-btn")
|
| 383 |
fusion_jazz_btn = gr.Button("Fusion Jazz", elem_classes="genre-btn")
|
| 384 |
+
edm_btn = gr.Button("EDM", elem_classes="genre-btn")
|
| 385 |
+
indie_folk_btn = gr.Button("Indie Folk", elem_classes="genre-btn")
|
| 386 |
|
| 387 |
with gr.Column(elem_classes="settings-container"):
|
| 388 |
cfg_scale = gr.Slider(
|
|
|
|
| 441 |
out_audio = gr.Audio(label="Generated Stereo Instrumental Track", type="filepath")
|
| 442 |
status = gr.Textbox(label="Status", interactive=False)
|
| 443 |
|
| 444 |
+
classic_rock_btn.click(set_classic_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 445 |
+
alternative_rock_btn.click(set_alternative_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 446 |
+
detroit_techno_btn.click(set_detroit_techno_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 447 |
+
deep_house_btn.click(set_deep_house_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 448 |
+
smooth_jazz_btn.click(set_smooth_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 449 |
+
bebop_jazz_btn.click(set_bebop_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 450 |
+
baroque_classical_btn.click(set_baroque_classical_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 451 |
+
romantic_classical_btn.click(set_romantic_classical_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 452 |
+
boom_bap_hiphop_btn.click(set_boom_bap_hiphop_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 453 |
+
trap_hiphop_btn.click(set_trap_hiphop_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 454 |
pop_rock_btn.click(set_pop_rock_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 455 |
fusion_jazz_btn.click(set_fusion_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 456 |
+
edm_btn.click(set_edm_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 457 |
+
indie_folk_btn.click(set_indie_folk_prompt, inputs=None, outputs=[instrumental_prompt])
|
| 458 |
gen_btn.click(
|
| 459 |
generate_music,
|
| 460 |
inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration],
|