Update appcud12.py
Browse filesbugs fixing cuda 12 reworking
- appcud12.py +178 -62
appcud12.py
CHANGED
|
@@ -12,24 +12,42 @@ from torch.cuda.amp import autocast
|
|
| 12 |
import warnings
|
| 13 |
import random
|
| 14 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Suppress warnings for cleaner output
|
| 17 |
warnings.filterwarnings("ignore")
|
| 18 |
|
| 19 |
# Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12
|
| 20 |
-
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:
|
| 21 |
|
| 22 |
# Optimize for CUDA 12
|
| 23 |
torch.backends.cudnn.benchmark = False
|
| 24 |
torch.backends.cudnn.deterministic = True
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Device setup
|
| 27 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 28 |
if device != "cuda":
|
| 29 |
-
|
| 30 |
sys.exit(1)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
|
| 34 |
# Memory cleanup function
|
| 35 |
def clean_memory():
|
|
@@ -37,27 +55,31 @@ def clean_memory():
|
|
| 37 |
gc.collect()
|
| 38 |
torch.cuda.ipc_collect()
|
| 39 |
torch.cuda.synchronize()
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Pre-run memory cleanup
|
| 43 |
clean_memory()
|
| 44 |
|
| 45 |
# Load MusicGen medium model into VRAM
|
| 46 |
try:
|
| 47 |
-
|
| 48 |
local_model_path = "./models/musicgen-medium"
|
| 49 |
if not os.path.exists(local_model_path):
|
| 50 |
-
|
| 51 |
-
|
| 52 |
sys.exit(1)
|
| 53 |
musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
|
| 54 |
musicgen_model.set_generation_params(
|
| 55 |
duration=30, # Strict 30s max per chunk
|
| 56 |
two_step_cfg=False
|
| 57 |
)
|
| 58 |
-
|
| 59 |
except Exception as e:
|
| 60 |
-
|
|
|
|
| 61 |
sys.exit(1)
|
| 62 |
|
| 63 |
# Check disk space
|
|
@@ -65,11 +87,12 @@ def check_disk_space(path="."):
|
|
| 65 |
stat = os.statvfs(path)
|
| 66 |
free_space = stat.f_bavail * stat.f_frsize / (1024**3) # Free space in GB
|
| 67 |
if free_space < 1.0:
|
| 68 |
-
|
| 69 |
return free_space >= 1.0
|
| 70 |
|
| 71 |
# Audio processing functions (CPU-based)
|
| 72 |
-
def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=
|
|
|
|
| 73 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 74 |
if audio_segment.channels == 2:
|
| 75 |
stereo_samples = samples.reshape(-1, 2)
|
|
@@ -91,41 +114,54 @@ def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=22050):
|
|
| 91 |
sample_width=audio_segment.sample_width,
|
| 92 |
channels=2
|
| 93 |
)
|
|
|
|
| 94 |
return balanced_segment
|
|
|
|
| 95 |
return audio_segment
|
| 96 |
|
| 97 |
def calculate_rms(segment):
|
| 98 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=
|
|
|
|
| 102 |
target_rms = 10 ** (target_rms_db / 20) * 32767
|
| 103 |
current_rms = calculate_rms(segment)
|
| 104 |
if current_rms > 0:
|
| 105 |
gain_factor = target_rms / current_rms
|
| 106 |
segment = segment.apply_gain(20 * np.log10(gain_factor))
|
| 107 |
segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
|
|
|
|
| 108 |
return segment
|
| 109 |
|
| 110 |
-
def hard_limit(audio_segment, limit_db=-3.0, sample_rate=
|
|
|
|
| 111 |
limit = 10 ** (limit_db / 20.0) * 32767
|
| 112 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 113 |
samples = np.clip(samples, -limit, limit).astype(np.int16)
|
| 114 |
-
|
| 115 |
samples.tobytes(),
|
| 116 |
frame_rate=sample_rate,
|
| 117 |
sample_width=audio_segment.sample_width,
|
| 118 |
channels=audio_segment.channels
|
| 119 |
)
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
def apply_eq(segment, sample_rate=
|
|
|
|
| 122 |
segment = segment.high_pass_filter(20)
|
| 123 |
segment = segment.low_pass_filter(20000)
|
|
|
|
| 124 |
return segment
|
| 125 |
|
| 126 |
def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
|
|
|
|
| 127 |
segment = segment.fade_in(fade_in_duration)
|
| 128 |
segment = segment.fade_out(fade_out_duration)
|
|
|
|
| 129 |
return segment
|
| 130 |
|
| 131 |
# Genre prompt functions
|
|
@@ -135,7 +171,9 @@ def set_red_hot_chili_peppers_prompt(bpm, drum_beat, synthesizer, rhythmic_steps
|
|
| 135 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 136 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines"
|
| 137 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
|
| 138 |
-
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 141 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
|
|
@@ -143,7 +181,9 @@ def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_
|
|
| 143 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 144 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
| 145 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs"
|
| 146 |
-
|
|
|
|
|
|
|
| 147 |
|
| 148 |
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 149 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
|
|
@@ -151,7 +191,9 @@ def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bas
|
|
| 151 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 152 |
bass = f", {bass_style}" if bass_style != "none" else ", deep bass"
|
| 153 |
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads"
|
| 154 |
-
|
|
|
|
|
|
|
| 155 |
|
| 156 |
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 157 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
|
|
@@ -159,7 +201,9 @@ def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, b
|
|
| 159 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 160 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 161 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs"
|
| 162 |
-
|
|
|
|
|
|
|
| 163 |
|
| 164 |
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 165 |
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
|
|
@@ -173,7 +217,9 @@ def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_st
|
|
| 173 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 174 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 175 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs"
|
| 176 |
-
|
|
|
|
|
|
|
| 177 |
|
| 178 |
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 179 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
|
|
@@ -181,7 +227,9 @@ def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, ba
|
|
| 181 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 182 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 183 |
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures"
|
| 184 |
-
|
|
|
|
|
|
|
| 185 |
|
| 186 |
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 187 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
|
|
@@ -189,7 +237,9 @@ def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
| 189 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths"
|
| 190 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 191 |
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers"
|
| 192 |
-
|
|
|
|
|
|
|
| 193 |
|
| 194 |
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 195 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
|
|
@@ -197,7 +247,9 @@ def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_st
|
|
| 197 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 198 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy bass"
|
| 199 |
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars"
|
| 200 |
-
|
|
|
|
|
|
|
| 201 |
|
| 202 |
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 203 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
|
|
@@ -205,7 +257,9 @@ def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bas
|
|
| 205 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 206 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
| 207 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs"
|
| 208 |
-
|
|
|
|
|
|
|
| 209 |
|
| 210 |
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 211 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
|
|
@@ -213,7 +267,9 @@ def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
| 213 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 214 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines"
|
| 215 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
|
| 218 |
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 219 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
|
|
@@ -221,7 +277,9 @@ def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_styl
|
|
| 221 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 222 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 223 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
| 224 |
-
|
|
|
|
|
|
|
| 225 |
|
| 226 |
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 227 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
|
|
@@ -229,7 +287,9 @@ def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style
|
|
| 229 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 230 |
bass = f", {bass_style}" if bass_style != "none" else ", slap bass"
|
| 231 |
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords"
|
| 232 |
-
|
|
|
|
|
|
|
| 233 |
|
| 234 |
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 235 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
|
|
@@ -237,7 +297,9 @@ def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_
|
|
| 237 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with a repetitive, hypnotic pattern"
|
| 238 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with a consistent, groovy pulse"
|
| 239 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 240 |
-
|
|
|
|
|
|
|
| 241 |
|
| 242 |
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 243 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
|
|
@@ -245,31 +307,49 @@ def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_styl
|
|
| 245 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with a repetitive, hypnotic progression"
|
| 246 |
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with a consistent, groovy pulse"
|
| 247 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 248 |
-
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# Preset configurations for genres (optimized for medium model)
|
| 251 |
PRESETS = {
|
| 252 |
-
"default": {"cfg_scale": 2.
|
| 253 |
-
"rock": {"cfg_scale":
|
| 254 |
-
"techno": {"cfg_scale":
|
| 255 |
-
"grunge": {"cfg_scale": 2.
|
| 256 |
-
"indie": {"cfg_scale": 2.
|
| 257 |
}
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
# Optimized generation function
|
| 260 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, vram_status: str):
|
| 261 |
global musicgen_model
|
| 262 |
if not instrumental_prompt.strip():
|
|
|
|
| 263 |
return None, "β οΈ Please enter a valid instrumental prompt!", vram_status
|
| 264 |
try:
|
| 265 |
-
|
| 266 |
start_time = time.time()
|
| 267 |
max_duration = 30 # Strict 30s max per chunk
|
| 268 |
total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
|
| 269 |
-
processing_sample_rate =
|
| 270 |
output_sample_rate = 32000 # MusicGen's native rate
|
| 271 |
audio_segments = []
|
| 272 |
-
overlap_duration = 0.
|
| 273 |
remaining_duration = total_duration
|
| 274 |
|
| 275 |
if preset != "default":
|
|
@@ -278,11 +358,13 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 278 |
top_k = preset_params["top_k"]
|
| 279 |
top_p = preset_params["top_p"]
|
| 280 |
temperature = preset_params["temperature"]
|
|
|
|
| 281 |
|
| 282 |
if not check_disk_space():
|
|
|
|
| 283 |
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
| 284 |
|
| 285 |
-
|
| 286 |
seed = 42
|
| 287 |
base_prompt = instrumental_prompt
|
| 288 |
clean_memory()
|
|
@@ -291,8 +373,9 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 291 |
while remaining_duration > 0:
|
| 292 |
current_duration = min(max_duration, remaining_duration)
|
| 293 |
generation_duration = current_duration # No overlap in generation
|
|
|
|
|
|
|
| 294 |
|
| 295 |
-
print(f"Generating chunk ({current_duration}s, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)...")
|
| 296 |
musicgen_model.set_generation_params(
|
| 297 |
duration=generation_duration,
|
| 298 |
use_sampling=True,
|
|
@@ -308,18 +391,28 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 308 |
torch.manual_seed(seed)
|
| 309 |
np.random.seed(seed)
|
| 310 |
torch.cuda.manual_seed_all(seed)
|
|
|
|
| 311 |
if not audio_segments:
|
|
|
|
| 312 |
audio_segment = musicgen_model.generate([base_prompt], progress=True)[0].cpu()
|
| 313 |
else:
|
|
|
|
| 314 |
prev_segment = audio_segments[-1]
|
| 315 |
prev_segment = balance_stereo(prev_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
| 316 |
temp_wav_path = f"temp_prev_{int(time.time()*1000)}.wav"
|
|
|
|
| 317 |
prev_segment.export(temp_wav_path, format="wav")
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
if prev_sr != processing_sample_rate:
|
|
|
|
| 320 |
prev_audio = torchaudio.transforms.Resample(prev_sr, processing_sample_rate)(prev_audio)
|
| 321 |
prev_audio = prev_audio.to(device)
|
| 322 |
os.remove(temp_wav_path)
|
|
|
|
| 323 |
audio_segment = musicgen_model.generate_continuation(
|
| 324 |
prompt=prev_audio[:, -int(processing_sample_rate * overlap_duration):],
|
| 325 |
prompt_sample_rate=processing_sample_rate,
|
|
@@ -329,24 +422,32 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 329 |
del prev_audio
|
| 330 |
clean_memory()
|
| 331 |
except Exception as e:
|
| 332 |
-
|
|
|
|
| 333 |
raise e
|
| 334 |
|
|
|
|
| 335 |
audio_segment = audio_segment.to(dtype=torch.float32)
|
| 336 |
if audio_segment.dim() == 1:
|
|
|
|
| 337 |
audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
|
| 338 |
elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
|
|
|
|
| 339 |
audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
|
| 340 |
|
| 341 |
if audio_segment.shape[0] != 2:
|
|
|
|
| 342 |
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
| 343 |
|
| 344 |
temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
|
|
|
|
| 345 |
torchaudio.save(temp_wav_path, audio_segment, output_sample_rate, bits_per_sample=16)
|
| 346 |
segment = AudioSegment.from_wav(temp_wav_path)
|
| 347 |
os.remove(temp_wav_path)
|
|
|
|
| 348 |
segment = segment - 15
|
| 349 |
if segment.frame_rate != processing_sample_rate:
|
|
|
|
| 350 |
segment = segment.set_frame_rate(processing_sample_rate)
|
| 351 |
segment = balance_stereo(segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
| 352 |
segment = rms_normalize(segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
|
@@ -355,11 +456,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 355 |
|
| 356 |
del audio_segment
|
| 357 |
clean_memory()
|
| 358 |
-
vram_status = f"VRAM after chunk: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 359 |
time.sleep(0.1)
|
| 360 |
remaining_duration -= current_duration
|
| 361 |
|
| 362 |
-
|
| 363 |
final_segment = audio_segments[0][:min(max_duration, total_duration) * 1000]
|
| 364 |
overlap_ms = int(overlap_duration * 1000)
|
| 365 |
|
|
@@ -368,6 +469,7 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 368 |
current_segment = current_segment[:min(max_duration, total_duration - (i * max_duration)) * 1000]
|
| 369 |
|
| 370 |
if overlap_ms > 0 and len(current_segment) > overlap_ms:
|
|
|
|
| 371 |
prev_overlap = final_segment[-overlap_ms:]
|
| 372 |
curr_overlap = current_segment[:overlap_ms]
|
| 373 |
num_samples = len(np.array(prev_overlap.get_array_of_samples(), dtype=np.float32)) // 2
|
|
@@ -387,10 +489,11 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 387 |
blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
| 388 |
final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
|
| 389 |
else:
|
|
|
|
| 390 |
final_segment += current_segment
|
| 391 |
|
| 392 |
final_segment = final_segment[:total_duration * 1000]
|
| 393 |
-
|
| 394 |
final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
| 395 |
final_segment = apply_eq(final_segment, sample_rate=processing_sample_rate)
|
| 396 |
final_segment = apply_fade(final_segment)
|
|
@@ -399,39 +502,42 @@ def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p
|
|
| 399 |
final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
|
| 400 |
|
| 401 |
mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
|
| 402 |
-
|
| 403 |
-
|
| 404 |
try:
|
|
|
|
| 405 |
final_segment.export(
|
| 406 |
mp3_path,
|
| 407 |
format="mp3",
|
| 408 |
bitrate="96k",
|
| 409 |
tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
|
| 410 |
)
|
| 411 |
-
|
| 412 |
except Exception as e:
|
| 413 |
-
|
| 414 |
fallback_path = f"fallback_output_{int(time.time())}.mp3"
|
| 415 |
try:
|
| 416 |
final_segment.export(fallback_path, format="mp3", bitrate="96k")
|
| 417 |
-
|
| 418 |
mp3_path = fallback_path
|
| 419 |
except Exception as fallback_e:
|
| 420 |
-
|
| 421 |
raise e
|
| 422 |
|
| 423 |
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
|
|
|
| 424 |
return mp3_path, "β
Done! Generated static-free track with adjusted volume levels.", vram_status
|
| 425 |
except Exception as e:
|
| 426 |
-
|
| 427 |
-
|
| 428 |
return None, f"β Generation failed: {e}", vram_status
|
| 429 |
finally:
|
| 430 |
clean_memory()
|
| 431 |
|
| 432 |
# Clear inputs function
|
| 433 |
def clear_inputs():
|
| 434 |
-
|
|
|
|
| 435 |
|
| 436 |
# Custom CSS
|
| 437 |
css = """
|
|
@@ -459,7 +565,7 @@ p {
|
|
| 459 |
color: #E0E0E0;
|
| 460 |
font-size: 12px;
|
| 461 |
}
|
| 462 |
-
.input-container, .settings-container, .output-container {
|
| 463 |
max-width: 1200px;
|
| 464 |
margin: 20px auto;
|
| 465 |
padding: 20px;
|
|
@@ -510,7 +616,7 @@ p {
|
|
| 510 |
"""
|
| 511 |
|
| 512 |
# Build Gradio interface
|
| 513 |
-
|
| 514 |
with gr.Blocks(css=css) as demo:
|
| 515 |
gr.Markdown("""
|
| 516 |
<div class="header-container">
|
|
@@ -551,7 +657,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 551 |
label="CFG Scale π―",
|
| 552 |
minimum=1.0,
|
| 553 |
maximum=10.0,
|
| 554 |
-
value=2.
|
| 555 |
step=0.1,
|
| 556 |
info="Controls how closely the music follows the prompt."
|
| 557 |
)
|
|
@@ -559,7 +665,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 559 |
label="Top-K Sampling π’",
|
| 560 |
minimum=10,
|
| 561 |
maximum=500,
|
| 562 |
-
value=
|
| 563 |
step=10,
|
| 564 |
info="Limits sampling to the top k most likely tokens."
|
| 565 |
)
|
|
@@ -648,6 +754,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 648 |
status = gr.Textbox(label="Status π’", interactive=False)
|
| 649 |
vram_status = gr.Textbox(label="VRAM Usage π", interactive=False, value="")
|
| 650 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 651 |
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
| 652 |
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
| 653 |
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
|
@@ -672,9 +783,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 672 |
inputs=None,
|
| 673 |
outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
|
| 674 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
|
| 676 |
# Launch locally without OpenAPI/docs
|
| 677 |
-
|
| 678 |
app = demo.launch(
|
| 679 |
server_name="0.0.0.0",
|
| 680 |
server_port=9999,
|
|
@@ -687,5 +803,5 @@ try:
|
|
| 687 |
fastapi_app.docs_url = None
|
| 688 |
fastapi_app.redoc_url = None
|
| 689 |
fastapi_app.openapi_url = None
|
| 690 |
-
except Exception:
|
| 691 |
-
|
|
|
|
| 12 |
import warnings
|
| 13 |
import random
|
| 14 |
import traceback
|
| 15 |
+
import logging
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
import mmap
|
| 19 |
|
| 20 |
# Suppress warnings for cleaner output
|
| 21 |
warnings.filterwarnings("ignore")
|
| 22 |
|
| 23 |
# Set PYTORCH_CUDA_ALLOC_CONF for CUDA 12
|
| 24 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:16"
|
| 25 |
|
| 26 |
# Optimize for CUDA 12
|
| 27 |
torch.backends.cudnn.benchmark = False
|
| 28 |
torch.backends.cudnn.deterministic = True
|
| 29 |
|
| 30 |
+
# Setup logging
|
| 31 |
+
log_dir = "logs"
|
| 32 |
+
os.makedirs(log_dir, exist_ok=True)
|
| 33 |
+
log_file = os.path.join(log_dir, f"musicgen_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
| 34 |
+
logging.basicConfig(
|
| 35 |
+
level=logging.DEBUG,
|
| 36 |
+
format="%(asctime)s [%(levelname)s] %(message)s",
|
| 37 |
+
handlers=[
|
| 38 |
+
logging.FileHandler(log_file),
|
| 39 |
+
logging.StreamHandler(sys.stdout)
|
| 40 |
+
]
|
| 41 |
+
)
|
| 42 |
+
logger = logging.getLogger(__name__)
|
| 43 |
+
|
| 44 |
# Device setup
|
| 45 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 46 |
if device != "cuda":
|
| 47 |
+
logger.error("CUDA is required for GPU rendering. CPU rendering is disabled.")
|
| 48 |
sys.exit(1)
|
| 49 |
+
logger.info(f"Using GPU: {torch.cuda.get_device_name(0)} (CUDA 12)")
|
| 50 |
+
logger.info(f"Using precision: float16 for model, float32 for CPU processing")
|
| 51 |
|
| 52 |
# Memory cleanup function
|
| 53 |
def clean_memory():
|
|
|
|
| 55 |
gc.collect()
|
| 56 |
torch.cuda.ipc_collect()
|
| 57 |
torch.cuda.synchronize()
|
| 58 |
+
vram_mb = torch.cuda.memory_allocated() / 1024**2
|
| 59 |
+
logger.info(f"Memory cleaned: VRAM allocated = {vram_mb:.2f} MB")
|
| 60 |
+
logger.debug(f"VRAM summary: {torch.cuda.memory_summary()}")
|
| 61 |
+
return vram_mb
|
| 62 |
|
| 63 |
# Pre-run memory cleanup
|
| 64 |
clean_memory()
|
| 65 |
|
| 66 |
# Load MusicGen medium model into VRAM
|
| 67 |
try:
|
| 68 |
+
logger.info("Loading MusicGen medium model into VRAM...")
|
| 69 |
local_model_path = "./models/musicgen-medium"
|
| 70 |
if not os.path.exists(local_model_path):
|
| 71 |
+
logger.error(f"Local model path {local_model_path} does not exist.")
|
| 72 |
+
logger.error("Please download the MusicGen medium model weights and place them in the correct directory.")
|
| 73 |
sys.exit(1)
|
| 74 |
musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
|
| 75 |
musicgen_model.set_generation_params(
|
| 76 |
duration=30, # Strict 30s max per chunk
|
| 77 |
two_step_cfg=False
|
| 78 |
)
|
| 79 |
+
logger.info("MusicGen medium model loaded successfully.")
|
| 80 |
except Exception as e:
|
| 81 |
+
logger.error(f"Failed to load MusicGen model: {e}")
|
| 82 |
+
logger.error(traceback.format_exc())
|
| 83 |
sys.exit(1)
|
| 84 |
|
| 85 |
# Check disk space
|
|
|
|
| 87 |
stat = os.statvfs(path)
|
| 88 |
free_space = stat.f_bavail * stat.f_frsize / (1024**3) # Free space in GB
|
| 89 |
if free_space < 1.0:
|
| 90 |
+
logger.warning(f"Low disk space ({free_space:.2f} GB). Ensure at least 1 GB free.")
|
| 91 |
return free_space >= 1.0
|
| 92 |
|
| 93 |
# Audio processing functions (CPU-based)
|
| 94 |
+
def balance_stereo(audio_segment, noise_threshold=-60, sample_rate=16000):
|
| 95 |
+
logger.debug(f"Balancing stereo for segment with sample rate {sample_rate}")
|
| 96 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 97 |
if audio_segment.channels == 2:
|
| 98 |
stereo_samples = samples.reshape(-1, 2)
|
|
|
|
| 114 |
sample_width=audio_segment.sample_width,
|
| 115 |
channels=2
|
| 116 |
)
|
| 117 |
+
logger.debug("Stereo balancing completed")
|
| 118 |
return balanced_segment
|
| 119 |
+
logger.debug("Segment is not stereo, returning unchanged")
|
| 120 |
return audio_segment
|
| 121 |
|
| 122 |
def calculate_rms(segment):
|
| 123 |
samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
| 124 |
+
rms = np.sqrt(np.mean(samples**2))
|
| 125 |
+
logger.debug(f"Calculated RMS: {rms}")
|
| 126 |
+
return rms
|
| 127 |
|
| 128 |
+
def rms_normalize(segment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=16000):
|
| 129 |
+
logger.debug(f"Normalizing RMS for segment with target {target_rms_db} dBFS")
|
| 130 |
target_rms = 10 ** (target_rms_db / 20) * 32767
|
| 131 |
current_rms = calculate_rms(segment)
|
| 132 |
if current_rms > 0:
|
| 133 |
gain_factor = target_rms / current_rms
|
| 134 |
segment = segment.apply_gain(20 * np.log10(gain_factor))
|
| 135 |
segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
|
| 136 |
+
logger.debug("RMS normalization completed")
|
| 137 |
return segment
|
| 138 |
|
| 139 |
+
def hard_limit(audio_segment, limit_db=-3.0, sample_rate=16000):
|
| 140 |
+
logger.debug(f"Applying hard limit at {limit_db} dBFS")
|
| 141 |
limit = 10 ** (limit_db / 20.0) * 32767
|
| 142 |
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
|
| 143 |
samples = np.clip(samples, -limit, limit).astype(np.int16)
|
| 144 |
+
limited_segment = AudioSegment(
|
| 145 |
samples.tobytes(),
|
| 146 |
frame_rate=sample_rate,
|
| 147 |
sample_width=audio_segment.sample_width,
|
| 148 |
channels=audio_segment.channels
|
| 149 |
)
|
| 150 |
+
logger.debug("Hard limit applied")
|
| 151 |
+
return limited_segment
|
| 152 |
|
| 153 |
+
def apply_eq(segment, sample_rate=16000):
|
| 154 |
+
logger.debug(f"Applying EQ with sample rate {sample_rate}")
|
| 155 |
segment = segment.high_pass_filter(20)
|
| 156 |
segment = segment.low_pass_filter(20000)
|
| 157 |
+
logger.debug("EQ applied")
|
| 158 |
return segment
|
| 159 |
|
| 160 |
def apply_fade(segment, fade_in_duration=500, fade_out_duration=500):
|
| 161 |
+
logger.debug(f"Applying fade: in={fade_in_duration}ms, out={fade_out_duration}ms")
|
| 162 |
segment = segment.fade_in(fade_in_duration)
|
| 163 |
segment = segment.fade_out(fade_out_duration)
|
| 164 |
+
logger.debug("Fade applied")
|
| 165 |
return segment
|
| 166 |
|
| 167 |
# Genre prompt functions
|
|
|
|
| 171 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 172 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy basslines"
|
| 173 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", syncopated guitar riffs"
|
| 174 |
+
prompt = f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Red Hot Chili Peppers-inspired vibe with dynamic energy and funky breakdowns, {rhythm} at {bpm} BPM."
|
| 175 |
+
logger.debug(f"Generated RHCP prompt: {prompt}")
|
| 176 |
+
return prompt
|
| 177 |
|
| 178 |
def set_nirvana_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 179 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("intense rhythmic steps" if bpm > 120 else "grungy rhythmic pulse")
|
|
|
|
| 181 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 182 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
| 183 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", raw distorted guitar riffs"
|
| 184 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Nirvana-inspired angst-filled sound with quiet-loud dynamics, {rhythm} at {bpm} BPM."
|
| 185 |
+
logger.debug(f"Generated Nirvana prompt: {prompt}")
|
| 186 |
+
return prompt
|
| 187 |
|
| 188 |
def set_pearl_jam_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 189 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "driving rhythmic flow")
|
|
|
|
| 191 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 192 |
bass = f", {bass_style}" if bass_style != "none" else ", deep bass"
|
| 193 |
guitar = f", {guitar_style} guitar leads" if guitar_style != "none" else ", soulful guitar leads"
|
| 194 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Pearl Jam-inspired emotional intensity with soaring choruses, {rhythm} at {bpm} BPM."
|
| 195 |
+
logger.debug(f"Generated Pearl Jam prompt: {prompt}")
|
| 196 |
+
return prompt
|
| 197 |
|
| 198 |
def set_soundgarden_grunge_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 199 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("heavy rhythmic steps" if bpm > 120 else "sludgy rhythmic groove")
|
|
|
|
| 201 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 202 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 203 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", heavy sludgy guitar riffs"
|
| 204 |
+
prompt = f"Instrumental grunge{bass}{guitar}{drum}{synth}, Soundgarden-inspired dark, psychedelic edge, {rhythm} at {bpm} BPM."
|
| 205 |
+
logger.debug(f"Generated Soundgarden prompt: {prompt}")
|
| 206 |
+
return prompt
|
| 207 |
|
| 208 |
def set_foo_fighters_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 209 |
styles = ["anthemic", "gritty", "melodic", "fast-paced", "driving"]
|
|
|
|
| 217 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 218 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 219 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else f", {style} guitar riffs"
|
| 220 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Foo Fighters-inspired {mood} vibe with powerful choruses, {rhythm} at {bpm} BPM."
|
| 221 |
+
logger.debug(f"Generated Foo Fighters prompt: {prompt}")
|
| 222 |
+
return prompt
|
| 223 |
|
| 224 |
def set_smashing_pumpkins_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 225 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("dynamic rhythmic steps" if bpm > 120 else "dreamy rhythmic flow")
|
|
|
|
| 227 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 228 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 229 |
guitar = f", {guitar_style} guitar textures" if guitar_style != "none" else ", dreamy guitar textures"
|
| 230 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Smashing Pumpkins-inspired blend of melancholy and aggression, {rhythm} at {bpm} BPM."
|
| 231 |
+
logger.debug(f"Generated Smashing Pumpkins prompt: {prompt}")
|
| 232 |
+
return prompt
|
| 233 |
|
| 234 |
def set_radiohead_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 235 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("complex rhythmic steps" if bpm > 120 else "intricate rhythmic pulse")
|
|
|
|
| 237 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", atmospheric synths"
|
| 238 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 239 |
guitar = f", {guitar_style} guitar layers" if guitar_style != "none" else ", intricate guitar layers"
|
| 240 |
+
prompt = f"Instrumental experimental rock{bass}{guitar}{drum}{synth}, Radiohead-inspired blend of introspective and innovative soundscapes, {rhythm} at {bpm} BPM."
|
| 241 |
+
logger.debug(f"Generated Radiohead prompt: {prompt}")
|
| 242 |
+
return prompt
|
| 243 |
|
| 244 |
def set_classic_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 245 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("bluesy rhythmic steps" if bpm > 120 else "steady rhythmic groove")
|
|
|
|
| 247 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 248 |
bass = f", {bass_style}" if bass_style != "none" else ", groovy bass"
|
| 249 |
guitar = f", {guitar_style} electric guitars" if guitar_style != "none" else ", bluesy electric guitars"
|
| 250 |
+
prompt = f"Instrumental classic rock{bass}{guitar}{drum}{synth}, Led Zeppelin-inspired raw energy with dynamic solos, {rhythm} at {bpm} BPM."
|
| 251 |
+
logger.debug(f"Generated Classic Rock prompt: {prompt}")
|
| 252 |
+
return prompt
|
| 253 |
|
| 254 |
def set_alternative_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 255 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("quirky rhythmic steps" if bpm > 120 else "energetic rhythmic flow")
|
|
|
|
| 257 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 258 |
bass = f", {bass_style}" if bass_style != "none" else ", melodic basslines"
|
| 259 |
guitar = f", {guitar_style} guitar riffs" if guitar_style != "none" else ", distorted guitar riffs"
|
| 260 |
+
prompt = f"Instrumental alternative rock{bass}{guitar}{drum}{synth}, Pixies-inspired quirky, energetic vibe, {rhythm} at {bpm} BPM."
|
| 261 |
+
logger.debug(f"Generated Alternative Rock prompt: {prompt}")
|
| 262 |
+
return prompt
|
| 263 |
|
| 264 |
def set_post_punk_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 265 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("sharp rhythmic steps" if bpm > 120 else "moody rhythmic pulse")
|
|
|
|
| 267 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 268 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines"
|
| 269 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
| 270 |
+
prompt = f"Instrumental post-punk{bass}{guitar}{drum}{synth}, Joy Division-inspired moody, atmospheric sound with a steady, hypnotic beat, {rhythm} at {bpm} BPM."
|
| 271 |
+
logger.debug(f"Generated Post-Punk prompt: {prompt}")
|
| 272 |
+
return prompt
|
| 273 |
|
| 274 |
def set_indie_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 275 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("catchy rhythmic steps" if bpm > 120 else "jangly rhythmic flow")
|
|
|
|
| 277 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 278 |
bass = f", {bass_style}" if bass_style != "none" else ""
|
| 279 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ", jangly guitars"
|
| 280 |
+
prompt = f"Instrumental indie rock{bass}{guitar}{drum}{synth}, Arctic Monkeys-inspired blend of catchy riffs, {rhythm} at {bpm} BPM."
|
| 281 |
+
logger.debug(f"Generated Indie Rock prompt: {prompt}")
|
| 282 |
+
return prompt
|
| 283 |
|
| 284 |
def set_funk_rock_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 285 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("aggressive rhythmic steps" if bpm > 120 else "funky rhythmic groove")
|
|
|
|
| 287 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ""
|
| 288 |
bass = f", {bass_style}" if bass_style != "none" else ", slap bass"
|
| 289 |
guitar = f", {guitar_style} guitar chords" if guitar_style != "none" else ", funky guitar chords"
|
| 290 |
+
prompt = f"Instrumental funk rock{bass}{guitar}{drum}{synth}, Rage Against the Machine-inspired mix of groove and aggression, {rhythm} at {bpm} BPM."
|
| 291 |
+
logger.debug(f"Generated Funk Rock prompt: {prompt}")
|
| 292 |
+
return prompt
|
| 293 |
|
| 294 |
def set_detroit_techno_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 295 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("pulsing rhythmic steps" if bpm > 120 else "deep rhythmic groove")
|
|
|
|
| 297 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", deep pulsing synths with a repetitive, hypnotic pattern"
|
| 298 |
bass = f", {bass_style}" if bass_style != "none" else ", driving basslines with a consistent, groovy pulse"
|
| 299 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 300 |
+
prompt = f"Instrumental Detroit techno{bass}{guitar}{drum}{synth}, Juan Atkins-inspired rhythmic groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
|
| 301 |
+
logger.debug(f"Generated Detroit Techno prompt: {prompt}")
|
| 302 |
+
return prompt
|
| 303 |
|
| 304 |
def set_deep_house_prompt(bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style):
|
| 305 |
rhythm = f" with {rhythmic_steps}" if rhythmic_steps != "none" else ("soulful rhythmic steps" if bpm > 120 else "laid-back rhythmic flow")
|
|
|
|
| 307 |
synth = f", {synthesizer} accents" if synthesizer != "none" else ", warm analog synth chords with a repetitive, hypnotic progression"
|
| 308 |
bass = f", {bass_style}" if bass_style != "none" else ", deep basslines with a consistent, groovy pulse"
|
| 309 |
guitar = f", {guitar_style} guitars" if guitar_style != "none" else ""
|
| 310 |
+
prompt = f"Instrumental deep house{bass}{guitar}{drum}{synth}, Larry Heard-inspired laid-back groove with a steady, repetitive beat, {rhythm} at {bpm} BPM."
|
| 311 |
+
logger.debug(f"Generated Deep House prompt: {prompt}")
|
| 312 |
+
return prompt
|
| 313 |
|
| 314 |
# Preset configurations for genres (optimized for medium model)
|
| 315 |
PRESETS = {
|
| 316 |
+
"default": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.8},
|
| 317 |
+
"rock": {"cfg_scale": 2.5, "top_k": 140, "top_p": 0.9, "temperature": 0.9},
|
| 318 |
+
"techno": {"cfg_scale": 1.8, "top_k": 160, "top_p": 0.85, "temperature": 0.7},
|
| 319 |
+
"grunge": {"cfg_scale": 2.0, "top_k": 150, "top_p": 0.9, "temperature": 0.85},
|
| 320 |
+
"indie": {"cfg_scale": 2.2, "top_k": 145, "top_p": 0.9, "temperature": 0.8}
|
| 321 |
}
|
| 322 |
|
| 323 |
+
# Function to get the latest log file
|
| 324 |
+
def get_latest_log():
|
| 325 |
+
log_files = sorted(Path(log_dir).glob("musicgen_log_*.log"), key=os.path.getmtime, reverse=True)
|
| 326 |
+
if not log_files:
|
| 327 |
+
logger.warning("No log files found")
|
| 328 |
+
return "No log files found."
|
| 329 |
+
try:
|
| 330 |
+
with open(log_files[0], "r") as f:
|
| 331 |
+
content = f.read()
|
| 332 |
+
logger.info(f"Retrieved latest log file: {log_files[0]}")
|
| 333 |
+
return content
|
| 334 |
+
except Exception as e:
|
| 335 |
+
logger.error(f"Failed to read log file {log_files[0]}: {e}")
|
| 336 |
+
return f"Error reading log file: {e}"
|
| 337 |
+
|
| 338 |
# Optimized generation function
|
| 339 |
def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, bpm: int, drum_beat: str, synthesizer: str, rhythmic_steps: str, bass_style: str, guitar_style: str, target_volume: float, preset: str, vram_status: str):
|
| 340 |
global musicgen_model
|
| 341 |
if not instrumental_prompt.strip():
|
| 342 |
+
logger.warning("Empty instrumental prompt provided")
|
| 343 |
return None, "β οΈ Please enter a valid instrumental prompt!", vram_status
|
| 344 |
try:
|
| 345 |
+
logger.info("Starting music generation...")
|
| 346 |
start_time = time.time()
|
| 347 |
max_duration = 30 # Strict 30s max per chunk
|
| 348 |
total_duration = min(max(total_duration, 30), 120) # Clamp between 30s and 120s
|
| 349 |
+
processing_sample_rate = 16000 # Lower for processing
|
| 350 |
output_sample_rate = 32000 # MusicGen's native rate
|
| 351 |
audio_segments = []
|
| 352 |
+
overlap_duration = 0.3 # 300ms for continuation and crossfade
|
| 353 |
remaining_duration = total_duration
|
| 354 |
|
| 355 |
if preset != "default":
|
|
|
|
| 358 |
top_k = preset_params["top_k"]
|
| 359 |
top_p = preset_params["top_p"]
|
| 360 |
temperature = preset_params["temperature"]
|
| 361 |
+
logger.info(f"Applied preset {preset}: cfg_scale={cfg_scale}, top_k={top_k}, top_p={top_p}, temperature={temperature}")
|
| 362 |
|
| 363 |
if not check_disk_space():
|
| 364 |
+
logger.error("Insufficient disk space")
|
| 365 |
return None, "β οΈ Insufficient disk space. Free up at least 1 GB.", vram_status
|
| 366 |
|
| 367 |
+
logger.info(f"Generating audio for {total_duration}s with seed=42")
|
| 368 |
seed = 42
|
| 369 |
base_prompt = instrumental_prompt
|
| 370 |
clean_memory()
|
|
|
|
| 373 |
while remaining_duration > 0:
|
| 374 |
current_duration = min(max_duration, remaining_duration)
|
| 375 |
generation_duration = current_duration # No overlap in generation
|
| 376 |
+
chunk_num = len(audio_segments) + 1
|
| 377 |
+
logger.info(f"Generating chunk {chunk_num} ({current_duration}s, VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB)")
|
| 378 |
|
|
|
|
| 379 |
musicgen_model.set_generation_params(
|
| 380 |
duration=generation_duration,
|
| 381 |
use_sampling=True,
|
|
|
|
| 391 |
torch.manual_seed(seed)
|
| 392 |
np.random.seed(seed)
|
| 393 |
torch.cuda.manual_seed_all(seed)
|
| 394 |
+
clean_memory() # Pre-generation cleanup
|
| 395 |
if not audio_segments:
|
| 396 |
+
logger.debug("Generating first chunk")
|
| 397 |
audio_segment = musicgen_model.generate([base_prompt], progress=True)[0].cpu()
|
| 398 |
else:
|
| 399 |
+
logger.debug("Generating continuation chunk")
|
| 400 |
prev_segment = audio_segments[-1]
|
| 401 |
prev_segment = balance_stereo(prev_segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
| 402 |
temp_wav_path = f"temp_prev_{int(time.time()*1000)}.wav"
|
| 403 |
+
logger.debug(f"Exporting previous segment to {temp_wav_path}")
|
| 404 |
prev_segment.export(temp_wav_path, format="wav")
|
| 405 |
+
# Use memory-mapped file I/O
|
| 406 |
+
with open(temp_wav_path, "rb") as f:
|
| 407 |
+
mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
| 408 |
+
prev_audio, prev_sr = torchaudio.load(temp_wav_path)
|
| 409 |
+
mmapped_file.close()
|
| 410 |
if prev_sr != processing_sample_rate:
|
| 411 |
+
logger.debug(f"Resampling from {prev_sr} to {processing_sample_rate}")
|
| 412 |
prev_audio = torchaudio.transforms.Resample(prev_sr, processing_sample_rate)(prev_audio)
|
| 413 |
prev_audio = prev_audio.to(device)
|
| 414 |
os.remove(temp_wav_path)
|
| 415 |
+
logger.debug(f"Deleted temporary file {temp_wav_path}")
|
| 416 |
audio_segment = musicgen_model.generate_continuation(
|
| 417 |
prompt=prev_audio[:, -int(processing_sample_rate * overlap_duration):],
|
| 418 |
prompt_sample_rate=processing_sample_rate,
|
|
|
|
| 422 |
del prev_audio
|
| 423 |
clean_memory()
|
| 424 |
except Exception as e:
|
| 425 |
+
logger.error(f"Error in chunk {chunk_num} generation: {e}")
|
| 426 |
+
logger.error(traceback.format_exc())
|
| 427 |
raise e
|
| 428 |
|
| 429 |
+
logger.debug(f"Generated audio segment shape: {audio_segment.shape}")
|
| 430 |
audio_segment = audio_segment.to(dtype=torch.float32)
|
| 431 |
if audio_segment.dim() == 1:
|
| 432 |
+
logger.debug("Converting mono to stereo")
|
| 433 |
audio_segment = torch.stack([audio_segment, audio_segment], dim=0)
|
| 434 |
elif audio_segment.dim() == 2 and audio_segment.shape[0] != 2:
|
| 435 |
+
logger.debug("Adjusting to stereo")
|
| 436 |
audio_segment = torch.cat([audio_segment, audio_segment], dim=0)
|
| 437 |
|
| 438 |
if audio_segment.shape[0] != 2:
|
| 439 |
+
logger.error(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
| 440 |
raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_segment.shape}")
|
| 441 |
|
| 442 |
temp_wav_path = f"temp_audio_{int(time.time()*1000)}.wav"
|
| 443 |
+
logger.debug(f"Saving audio segment to {temp_wav_path}")
|
| 444 |
torchaudio.save(temp_wav_path, audio_segment, output_sample_rate, bits_per_sample=16)
|
| 445 |
segment = AudioSegment.from_wav(temp_wav_path)
|
| 446 |
os.remove(temp_wav_path)
|
| 447 |
+
logger.debug(f"Deleted temporary file {temp_wav_path}")
|
| 448 |
segment = segment - 15
|
| 449 |
if segment.frame_rate != processing_sample_rate:
|
| 450 |
+
logger.debug(f"Setting segment sample rate to {processing_sample_rate}")
|
| 451 |
segment = segment.set_frame_rate(processing_sample_rate)
|
| 452 |
segment = balance_stereo(segment, noise_threshold=-60, sample_rate=processing_sample_rate)
|
| 453 |
segment = rms_normalize(segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
|
|
|
| 456 |
|
| 457 |
del audio_segment
|
| 458 |
clean_memory()
|
| 459 |
+
vram_status = f"VRAM after chunk {chunk_num}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 460 |
time.sleep(0.1)
|
| 461 |
remaining_duration -= current_duration
|
| 462 |
|
| 463 |
+
logger.info("Combining audio chunks...")
|
| 464 |
final_segment = audio_segments[0][:min(max_duration, total_duration) * 1000]
|
| 465 |
overlap_ms = int(overlap_duration * 1000)
|
| 466 |
|
|
|
|
| 469 |
current_segment = current_segment[:min(max_duration, total_duration - (i * max_duration)) * 1000]
|
| 470 |
|
| 471 |
if overlap_ms > 0 and len(current_segment) > overlap_ms:
|
| 472 |
+
logger.debug(f"Applying crossfade between chunks {i} and {i+1}")
|
| 473 |
prev_overlap = final_segment[-overlap_ms:]
|
| 474 |
curr_overlap = current_segment[:overlap_ms]
|
| 475 |
num_samples = len(np.array(prev_overlap.get_array_of_samples(), dtype=np.float32)) // 2
|
|
|
|
| 489 |
blended_segment = rms_normalize(blended_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
| 490 |
final_segment = final_segment[:-overlap_ms] + blended_segment + current_segment[overlap_ms:]
|
| 491 |
else:
|
| 492 |
+
logger.debug(f"Concatenating chunk {i+1} without crossfade")
|
| 493 |
final_segment += current_segment
|
| 494 |
|
| 495 |
final_segment = final_segment[:total_duration * 1000]
|
| 496 |
+
logger.info("Post-processing final track...")
|
| 497 |
final_segment = rms_normalize(final_segment, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=processing_sample_rate)
|
| 498 |
final_segment = apply_eq(final_segment, sample_rate=processing_sample_rate)
|
| 499 |
final_segment = apply_fade(final_segment)
|
|
|
|
| 502 |
final_segment = final_segment.set_frame_rate(output_sample_rate) # Upsample to output rate
|
| 503 |
|
| 504 |
mp3_path = f"output_adjusted_volume_{int(time.time())}.mp3"
|
| 505 |
+
logger.info("β οΈ WARNING: Audio is set to safe levels (~ -23 dBFS RMS, -3 dBFS peak). Start playback at LOW volume (10-20%) and adjust gradually.")
|
| 506 |
+
logger.info("VERIFY: Open the file in Audacity to check for static. RMS should be ~ -23 dBFS, peaks β€ -3 dBFS. Report any static or issues.")
|
| 507 |
try:
|
| 508 |
+
logger.debug(f"Exporting final audio to {mp3_path}")
|
| 509 |
final_segment.export(
|
| 510 |
mp3_path,
|
| 511 |
format="mp3",
|
| 512 |
bitrate="96k",
|
| 513 |
tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
|
| 514 |
)
|
| 515 |
+
logger.info(f"Final audio saved to {mp3_path}")
|
| 516 |
except Exception as e:
|
| 517 |
+
logger.error(f"Error exporting MP3: {e}")
|
| 518 |
fallback_path = f"fallback_output_{int(time.time())}.mp3"
|
| 519 |
try:
|
| 520 |
final_segment.export(fallback_path, format="mp3", bitrate="96k")
|
| 521 |
+
logger.info(f"Final audio saved to fallback: {fallback_path}")
|
| 522 |
mp3_path = fallback_path
|
| 523 |
except Exception as fallback_e:
|
| 524 |
+
logger.error(f"Failed to save fallback MP3: {fallback_e}")
|
| 525 |
raise e
|
| 526 |
|
| 527 |
vram_status = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
|
| 528 |
+
logger.info(f"Generation completed in {time.time() - start_time:.2f} seconds")
|
| 529 |
return mp3_path, "β
Done! Generated static-free track with adjusted volume levels.", vram_status
|
| 530 |
except Exception as e:
|
| 531 |
+
logger.error(f"Generation failed: {e}")
|
| 532 |
+
logger.error(traceback.format_exc())
|
| 533 |
return None, f"β Generation failed: {e}", vram_status
|
| 534 |
finally:
|
| 535 |
clean_memory()
|
| 536 |
|
| 537 |
# Clear inputs function
|
| 538 |
def clear_inputs():
|
| 539 |
+
logger.info("Clearing input fields")
|
| 540 |
+
return "", 2.0, 150, 0.9, 0.8, 30, 120, "none", "none", "none", "none", "none", -23.0, "default", ""
|
| 541 |
|
| 542 |
# Custom CSS
|
| 543 |
css = """
|
|
|
|
| 565 |
color: #E0E0E0;
|
| 566 |
font-size: 12px;
|
| 567 |
}
|
| 568 |
+
.input-container, .settings-container, .output-container, .logs-container {
|
| 569 |
max-width: 1200px;
|
| 570 |
margin: 20px auto;
|
| 571 |
padding: 20px;
|
|
|
|
| 616 |
"""
|
| 617 |
|
| 618 |
# Build Gradio interface
|
| 619 |
+
logger.info("Building Gradio interface...")
|
| 620 |
with gr.Blocks(css=css) as demo:
|
| 621 |
gr.Markdown("""
|
| 622 |
<div class="header-container">
|
|
|
|
| 657 |
label="CFG Scale π―",
|
| 658 |
minimum=1.0,
|
| 659 |
maximum=10.0,
|
| 660 |
+
value=2.0,
|
| 661 |
step=0.1,
|
| 662 |
info="Controls how closely the music follows the prompt."
|
| 663 |
)
|
|
|
|
| 665 |
label="Top-K Sampling π’",
|
| 666 |
minimum=10,
|
| 667 |
maximum=500,
|
| 668 |
+
value=150,
|
| 669 |
step=10,
|
| 670 |
info="Limits sampling to the top k most likely tokens."
|
| 671 |
)
|
|
|
|
| 754 |
status = gr.Textbox(label="Status π’", interactive=False)
|
| 755 |
vram_status = gr.Textbox(label="VRAM Usage π", interactive=False, value="")
|
| 756 |
|
| 757 |
+
with gr.Column(elem_classes="logs-container"):
|
| 758 |
+
gr.Markdown("### π Logs")
|
| 759 |
+
log_output = gr.Textbox(label="Last Log File Contents", lines=20, interactive=False)
|
| 760 |
+
log_btn = gr.Button("View Last Log π")
|
| 761 |
+
|
| 762 |
rhcp_btn.click(set_red_hot_chili_peppers_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
| 763 |
nirvana_btn.click(set_nirvana_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
| 764 |
pearl_jam_btn.click(set_pearl_jam_grunge_prompt, inputs=[bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style], outputs=instrumental_prompt)
|
|
|
|
| 783 |
inputs=None,
|
| 784 |
outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume, preset, vram_status]
|
| 785 |
)
|
| 786 |
+
log_btn.click(
|
| 787 |
+
get_latest_log,
|
| 788 |
+
inputs=None,
|
| 789 |
+
outputs=log_output
|
| 790 |
+
)
|
| 791 |
|
| 792 |
# Launch locally without OpenAPI/docs
|
| 793 |
+
logger.info("Launching Gradio UI at http://localhost:9999...")
|
| 794 |
app = demo.launch(
|
| 795 |
server_name="0.0.0.0",
|
| 796 |
server_port=9999,
|
|
|
|
| 803 |
fastapi_app.docs_url = None
|
| 804 |
fastapi_app.redoc_url = None
|
| 805 |
fastapi_app.openapi_url = None
|
| 806 |
+
except Exception as e:
|
| 807 |
+
logger.error(f"Failed to configure FastAPI app: {e}")
|