ghostai1
/

GHOSTSONAFB

English

python

Model card Files Files and versions

xet

Community

ghostai1 commited on May 11

Commit

c6de860

verified ·

1 Parent(s): 3bcd320

Update app.py

Browse files

Music best practices applied to code

Files changed (1) hide show

app.py +29 -9

app.py CHANGED Viewed

@@ -78,14 +78,14 @@ def set_deep_house_prompt():
 # 5) AUDIO PROCESSING FUNCTIONS
 def apply_chorus(segment):
     # Enhanced chorus effect for richer sound
-    delayed = segment - 6  # Reduced gain to -6 dB for a subtler effect
     delayed = delayed.set_frame_rate(segment.frame_rate)
     return segment.overlay(delayed, position=20)
 def apply_eq(segment):
     # Adjusted EQ for a more balanced sound
-    segment = segment.low_pass_filter(8000)  # Raised cutoff to 8kHz for brighter highs
-    segment = segment.high_pass_filter(80)  # Lowered cutoff to 80Hz for deeper bass
     return segment
 def apply_limiter(segment, max_db=-3.0):
@@ -94,6 +94,17 @@ def apply_limiter(segment, max_db=-3.0):
         segment = segment - (segment.dBFS - max_db)
     return segment
 # 6) GENERATION & I/O FUNCTIONS
 def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
     global musicgen_model
@@ -105,7 +116,8 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
         # Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
         total_duration = min(max(total_duration, 10), 90)
         chunk_duration = 15
-        num_chunks = max(2, (total_duration + chunk_duration - 1) // chunk_duration)
         chunk_duration = total_duration / num_chunks
         # Generate slightly longer chunks for overlap
@@ -115,10 +127,13 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
         audio_chunks = []
         sample_rate = musicgen_model.sample_rate
         # Generate audio in chunks with a consistent prompt
         for i in range(num_chunks):
             chunk_prompt = instrumental_prompt  # Use the same prompt for all chunks
             print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
             musicgen_model.set_generation_params(
                 duration=generation_duration,
@@ -153,6 +168,9 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
             chunk_path = f"chunk_{i}.mp3"
             torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24)
             segment = AudioSegment.from_wav(temp_wav_path)
             segment.export(chunk_path, format="mp3", bitrate="320k")
             os.remove(temp_wav_path)
             audio_chunks.append(chunk_path)
@@ -167,7 +185,8 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
         final_segment = AudioSegment.from_mp3(audio_chunks[0])
         for i in range(1, len(audio_chunks)):
             next_segment = AudioSegment.from_mp3(audio_chunks[i])
-            next_segment = next_segment + 1  # Reduced gain boost to +1 dB
             final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
         # Trim to exact total duration
@@ -175,10 +194,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
         # Post-process with improved dynamics
         print("Post-processing final track...")
-        final_segment = apply_eq(final_segment)
         final_segment = apply_chorus(final_segment)
-        final_segment = apply_limiter(final_segment, max_db=-3.0)  # Apply limiter only once with higher threshold
-        final_segment = final_segment.normalize(headroom=-6.0)  # Increased headroom to -6 dB
         mp3_path = "output_cleaned.mp3"
         final_segment.export(

 # 5) AUDIO PROCESSING FUNCTIONS
 def apply_chorus(segment):
     # Enhanced chorus effect for richer sound
+    delayed = segment - 8  # Increased to -8 dB for a subtler effect to avoid muddiness
     delayed = delayed.set_frame_rate(segment.frame_rate)
     return segment.overlay(delayed, position=20)
 def apply_eq(segment):
     # Adjusted EQ for a more balanced sound
+    segment = segment.low_pass_filter(7000)  # Lowered to 7000 Hz to reduce harsh highs
+    segment = segment.high_pass_filter(100)  # Raised to 100 Hz to reduce low-end rumble
     return segment
 def apply_limiter(segment, max_db=-3.0):
         segment = segment - (segment.dBFS - max_db)
     return segment
+def apply_final_gain(segment, target_db=-12.0):
+    # Add final gain adjustment for consistent loudness
+    gain_adjustment = target_db - segment.dBFS
+    return segment + gain_adjustment
+def apply_fade(segment, fade_in_duration=2000, fade_out_duration=2000):
+    # Apply fade-in and fade-out to the segment (durations in milliseconds)
+    segment = segment.fade_in(fade_in_duration)
+    segment = segment.fade_out(fade_out_duration)
+    return segment
 # 6) GENERATION & I/O FUNCTIONS
 def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
     global musicgen_model
         # Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
         total_duration = min(max(total_duration, 10), 90)
         chunk_duration = 15
+        # Use 2 chunks for durations up to 30 seconds, 3 chunks for longer durations
+        num_chunks = 2 if total_duration <= 30 else 3
         chunk_duration = total_duration / num_chunks
         # Generate slightly longer chunks for overlap
         audio_chunks = []
         sample_rate = musicgen_model.sample_rate
+        # Set a fixed seed for consistent generation across chunks
+        torch.manual_seed(42)  # Fixed seed for reproducibility
+        np.random.seed(42)
         # Generate audio in chunks with a consistent prompt
         for i in range(num_chunks):
             chunk_prompt = instrumental_prompt  # Use the same prompt for all chunks
             print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
             musicgen_model.set_generation_params(
                 duration=generation_duration,
             chunk_path = f"chunk_{i}.mp3"
             torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24)
             segment = AudioSegment.from_wav(temp_wav_path)
+            # Apply EQ and limiter to each chunk to maintain consistent dynamics
+            segment = apply_eq(segment)
+            segment = apply_limiter(segment, max_db=-3.0)
             segment.export(chunk_path, format="mp3", bitrate="320k")
             os.remove(temp_wav_path)
             audio_chunks.append(chunk_path)
         final_segment = AudioSegment.from_mp3(audio_chunks[0])
         for i in range(1, len(audio_chunks)):
             next_segment = AudioSegment.from_mp3(audio_chunks[i])
+            # Apply a smaller gain boost before crossfading to avoid volume spikes
+            next_segment = next_segment - 2  # Reduce by 2 dB to soften transitions
             final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
         # Trim to exact total duration
         # Post-process with improved dynamics
         print("Post-processing final track...")
         final_segment = apply_chorus(final_segment)
+        final_segment = apply_limiter(final_segment, max_db=-3.0)
+        final_segment = apply_fade(final_segment, fade_in_duration=2000, fade_out_duration=2000)
+        final_segment = final_segment.normalize(headroom=-6.0)
+        final_segment = apply_final_gain(final_segment, target_db=-12.0)
         mp3_path = "output_cleaned.mp3"
         final_segment.export(