Update app.py
Browse filesMusic best practices applied to code
app.py
CHANGED
|
@@ -78,14 +78,14 @@ def set_deep_house_prompt():
|
|
| 78 |
# 5) AUDIO PROCESSING FUNCTIONS
|
| 79 |
def apply_chorus(segment):
|
| 80 |
# Enhanced chorus effect for richer sound
|
| 81 |
-
delayed = segment -
|
| 82 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
| 83 |
return segment.overlay(delayed, position=20)
|
| 84 |
|
| 85 |
def apply_eq(segment):
|
| 86 |
# Adjusted EQ for a more balanced sound
|
| 87 |
-
segment = segment.low_pass_filter(
|
| 88 |
-
segment = segment.high_pass_filter(
|
| 89 |
return segment
|
| 90 |
|
| 91 |
def apply_limiter(segment, max_db=-3.0):
|
|
@@ -94,6 +94,17 @@ def apply_limiter(segment, max_db=-3.0):
|
|
| 94 |
segment = segment - (segment.dBFS - max_db)
|
| 95 |
return segment
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# 6) GENERATION & I/O FUNCTIONS
|
| 98 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
| 99 |
global musicgen_model
|
|
@@ -105,7 +116,8 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 105 |
# Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
|
| 106 |
total_duration = min(max(total_duration, 10), 90)
|
| 107 |
chunk_duration = 15
|
| 108 |
-
|
|
|
|
| 109 |
chunk_duration = total_duration / num_chunks
|
| 110 |
|
| 111 |
# Generate slightly longer chunks for overlap
|
|
@@ -115,10 +127,13 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 115 |
audio_chunks = []
|
| 116 |
sample_rate = musicgen_model.sample_rate
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# Generate audio in chunks with a consistent prompt
|
| 119 |
for i in range(num_chunks):
|
| 120 |
chunk_prompt = instrumental_prompt # Use the same prompt for all chunks
|
| 121 |
-
|
| 122 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
| 123 |
musicgen_model.set_generation_params(
|
| 124 |
duration=generation_duration,
|
|
@@ -153,6 +168,9 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 153 |
chunk_path = f"chunk_{i}.mp3"
|
| 154 |
torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24)
|
| 155 |
segment = AudioSegment.from_wav(temp_wav_path)
|
|
|
|
|
|
|
|
|
|
| 156 |
segment.export(chunk_path, format="mp3", bitrate="320k")
|
| 157 |
os.remove(temp_wav_path)
|
| 158 |
audio_chunks.append(chunk_path)
|
|
@@ -167,7 +185,8 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 167 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
| 168 |
for i in range(1, len(audio_chunks)):
|
| 169 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
| 170 |
-
|
|
|
|
| 171 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
| 172 |
|
| 173 |
# Trim to exact total duration
|
|
@@ -175,10 +194,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 175 |
|
| 176 |
# Post-process with improved dynamics
|
| 177 |
print("Post-processing final track...")
|
| 178 |
-
final_segment = apply_eq(final_segment)
|
| 179 |
final_segment = apply_chorus(final_segment)
|
| 180 |
-
final_segment = apply_limiter(final_segment, max_db=-3.0)
|
| 181 |
-
final_segment = final_segment
|
|
|
|
|
|
|
| 182 |
|
| 183 |
mp3_path = "output_cleaned.mp3"
|
| 184 |
final_segment.export(
|
|
|
|
| 78 |
# 5) AUDIO PROCESSING FUNCTIONS
|
| 79 |
def apply_chorus(segment):
|
| 80 |
# Enhanced chorus effect for richer sound
|
| 81 |
+
delayed = segment - 8 # Increased to -8 dB for a subtler effect to avoid muddiness
|
| 82 |
delayed = delayed.set_frame_rate(segment.frame_rate)
|
| 83 |
return segment.overlay(delayed, position=20)
|
| 84 |
|
| 85 |
def apply_eq(segment):
|
| 86 |
# Adjusted EQ for a more balanced sound
|
| 87 |
+
segment = segment.low_pass_filter(7000) # Lowered to 7000 Hz to reduce harsh highs
|
| 88 |
+
segment = segment.high_pass_filter(100) # Raised to 100 Hz to reduce low-end rumble
|
| 89 |
return segment
|
| 90 |
|
| 91 |
def apply_limiter(segment, max_db=-3.0):
|
|
|
|
| 94 |
segment = segment - (segment.dBFS - max_db)
|
| 95 |
return segment
|
| 96 |
|
| 97 |
+
def apply_final_gain(segment, target_db=-12.0):
|
| 98 |
+
# Add final gain adjustment for consistent loudness
|
| 99 |
+
gain_adjustment = target_db - segment.dBFS
|
| 100 |
+
return segment + gain_adjustment
|
| 101 |
+
|
| 102 |
+
def apply_fade(segment, fade_in_duration=2000, fade_out_duration=2000):
|
| 103 |
+
# Apply fade-in and fade-out to the segment (durations in milliseconds)
|
| 104 |
+
segment = segment.fade_in(fade_in_duration)
|
| 105 |
+
segment = segment.fade_out(fade_out_duration)
|
| 106 |
+
return segment
|
| 107 |
+
|
| 108 |
# 6) GENERATION & I/O FUNCTIONS
|
| 109 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
|
| 110 |
global musicgen_model
|
|
|
|
| 116 |
# Ensure total duration is within reasonable bounds (up to 90 seconds for longer tracks)
|
| 117 |
total_duration = min(max(total_duration, 10), 90)
|
| 118 |
chunk_duration = 15
|
| 119 |
+
# Use 2 chunks for durations up to 30 seconds, 3 chunks for longer durations
|
| 120 |
+
num_chunks = 2 if total_duration <= 30 else 3
|
| 121 |
chunk_duration = total_duration / num_chunks
|
| 122 |
|
| 123 |
# Generate slightly longer chunks for overlap
|
|
|
|
| 127 |
audio_chunks = []
|
| 128 |
sample_rate = musicgen_model.sample_rate
|
| 129 |
|
| 130 |
+
# Set a fixed seed for consistent generation across chunks
|
| 131 |
+
torch.manual_seed(42) # Fixed seed for reproducibility
|
| 132 |
+
np.random.seed(42)
|
| 133 |
+
|
| 134 |
# Generate audio in chunks with a consistent prompt
|
| 135 |
for i in range(num_chunks):
|
| 136 |
chunk_prompt = instrumental_prompt # Use the same prompt for all chunks
|
|
|
|
| 137 |
print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
|
| 138 |
musicgen_model.set_generation_params(
|
| 139 |
duration=generation_duration,
|
|
|
|
| 168 |
chunk_path = f"chunk_{i}.mp3"
|
| 169 |
torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24)
|
| 170 |
segment = AudioSegment.from_wav(temp_wav_path)
|
| 171 |
+
# Apply EQ and limiter to each chunk to maintain consistent dynamics
|
| 172 |
+
segment = apply_eq(segment)
|
| 173 |
+
segment = apply_limiter(segment, max_db=-3.0)
|
| 174 |
segment.export(chunk_path, format="mp3", bitrate="320k")
|
| 175 |
os.remove(temp_wav_path)
|
| 176 |
audio_chunks.append(chunk_path)
|
|
|
|
| 185 |
final_segment = AudioSegment.from_mp3(audio_chunks[0])
|
| 186 |
for i in range(1, len(audio_chunks)):
|
| 187 |
next_segment = AudioSegment.from_mp3(audio_chunks[i])
|
| 188 |
+
# Apply a smaller gain boost before crossfading to avoid volume spikes
|
| 189 |
+
next_segment = next_segment - 2 # Reduce by 2 dB to soften transitions
|
| 190 |
final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
|
| 191 |
|
| 192 |
# Trim to exact total duration
|
|
|
|
| 194 |
|
| 195 |
# Post-process with improved dynamics
|
| 196 |
print("Post-processing final track...")
|
|
|
|
| 197 |
final_segment = apply_chorus(final_segment)
|
| 198 |
+
final_segment = apply_limiter(final_segment, max_db=-3.0)
|
| 199 |
+
final_segment = apply_fade(final_segment, fade_in_duration=2000, fade_out_duration=2000)
|
| 200 |
+
final_segment = final_segment.normalize(headroom=-6.0)
|
| 201 |
+
final_segment = apply_final_gain(final_segment, target_db=-12.0)
|
| 202 |
|
| 203 |
mp3_path = "output_cleaned.mp3"
|
| 204 |
final_segment.export(
|