ghostai1
/

GHOSTSONAFB

English

python

Model card Files Files and versions

xet

Community

ghostai1 commited on May 10

Commit

00c59ff

verified ·

1 Parent(s): 621b6ab

Create app.py

Browse files

Files changed (1) hide show

app.py +467 -0

app.py ADDED Viewed

	@@ -0,0 +1,467 @@

+import os
+import torch
+import torchaudio
+import psutil
+import time
+import sys
+import numpy as np
+import gc
+import gradio as gr
+from pydub import AudioSegment
+from audiocraft.models import MusicGen
+from torch.cuda.amp import autocast
+# Set PYTORCH_CUDA_ALLOC_CONF to manage memory fragmentation
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
+# Setup instructions:
+# 1. Create a virtual environment: python3 -m venv venv
+# 2. Activate the environment: source venv/bin/activate
+# 3. Install dependencies:
+#    pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu121
+#    pip install -r requirements.txt
+# 4. Install ffmpeg for MP3 export:
+#    sudo apt-get install ffmpeg
+# 5. Ensure model weights are in /home/ubuntu/ghostai_music_generator/models/musicgen-medium
+# 6. Log in to Hugging Face CLI: huggingface-cli login
+# 7. Request access to model if needed: https://huggingface.co/facebook/musicgen-medium
+# Check critical dependencies
+if np.__version__ != "1.23.5":
+    print(f"ERROR: NumPy version {np.__version__} is not compatible. Please install numpy==1.23.5.")
+    sys.exit(1)
+if not torch.__version__.startswith(("2.1.0", "2.3.1")):
+    print(f"WARNING: PyTorch version {torch.__version__} may not be compatible. Expected torch==2.1.0 or 2.3.1.")
+# 1) DEVICE SETUP
+device = "cuda" if torch.cuda.is_available() else "cpu"
+if device != "cuda":
+    print("ERROR: CUDA is required for GPU rendering. CPU rendering is disabled to avoid slow performance.")
+    sys.exit(1)
+print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
+# 2) LOAD MUSICGEN INTO VRAM
+try:
+    print("Loading MusicGen model into VRAM...")
+    local_model_path = "/home/ubuntu/ghostai_music_generator/models/musicgen-medium"
+    if not os.path.exists(local_model_path):
+        print(f"ERROR: Local model path {local_model_path} does not exist. Please ensure the model weights are downloaded.")
+        sys.exit(1)
+    musicgen_model = MusicGen.get_pretrained(local_model_path, device=device)
+except Exception as e:
+    print(f"ERROR: Failed to load MusicGen model: {e}")
+    print("Please ensure the model weights are in the correct path and dependencies are installed.")
+    sys.exit(1)
+# 3) RESOURCE MONITORING FUNCTION
+def print_resource_usage(stage: str):
+    print(f"--- {stage} ---")
+    print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / (1024**3):.2f} GB")
+    print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / (1024**3):.2f} GB")
+    print("---------------")
+# 4) GENRE PROMPT FUNCTIONS (Updated for distinct song sections with balanced energy)
+def set_rock_prompt():
+    return "Hard rock with a dynamic intro, expressive verse, and powerful chorus, featuring electric guitars, steady heavy drums, and deep bass"
+def set_techno_prompt():
+    return "Techno with a pulsing intro, intense build-up, and energetic drop, featuring dark synths, driving bass, and rhythmic fast drums"
+def set_jazz_prompt():
+    return "Smooth jazz with a warm intro, expressive theme, and lively outro, featuring saxophone, piano, and soft rhythmic drums"
+def set_classical_prompt():
+    return "Classical orchestral piece with a gentle intro, dramatic development, and grand finale, featuring strings, piano, and subtle rhythmic percussion"
+def set_hiphop_prompt():
+    return "Hip-hop with a groovy intro, rhythmic verse, and catchy hook, featuring deep bass, tight crisp drums, and funky synths"
+# 5) AUDIO PROCESSING FUNCTIONS
+def apply_chorus(segment):
+    # Enhanced chorus effect for richer sound
+    delayed = segment - 4  # Slightly louder delayed copy at -4 dB (was -6 dB)
+    delayed = delayed.set_frame_rate(segment.frame_rate)
+    return segment.overlay(delayed, position=20)  # Increased delay to 20ms for more noticeable effect
+def apply_eq(segment):
+    # Apply EQ: Balanced filters for cleaner sound
+    segment = segment.low_pass_filter(6000)  # Loosen low-pass filter for brighter highs
+    segment = segment.high_pass_filter(100)  # Slightly lower cutoff for low rumble
+    return segment
+def apply_limiter(segment, max_db=-8.0):
+    # Apply a strict limiter to control peaks
+    if segment.dBFS > max_db:
+        segment = segment - (segment.dBFS - max_db)  # Reduce gain to prevent clipping
+    return segment
+# 6) GENERATION & I/O FUNCTIONS
+def generate_music(instrumental_prompt: str, cfg_scale: float, top_k: int, top_p: float, temperature: float, total_duration: int, crossfade_duration: int):
+    global musicgen_model  # Use global to modify the model instance
+    if not instrumental_prompt.strip():
+        return None, "⚠️ Please enter a valid instrumental prompt!"
+    try:
+        # Record start time for total duration calculation
+        start_time = time.time()
+        # Ensure total duration is within reasonable bounds
+        total_duration = min(max(total_duration, 10), 60)  # Between 10 and 60 seconds
+        chunk_duration = 15  # Duration of each chunk (e.g., 2 chunks of 15 seconds for a 30-second track)
+        num_chunks = max(2, (total_duration + chunk_duration - 1) // chunk_duration)  # Ensure at least 2 chunks
+        chunk_duration = total_duration / num_chunks  # Adjust chunk duration to fit total duration
+        # Generate slightly longer chunks for overlap
+        overlap_duration = min(1.0, crossfade_duration / 1000.0)  # Convert ms to seconds, max 1 second
+        generation_duration = chunk_duration + overlap_duration  # Add overlap to each chunk
+        # Initialize list to store audio chunks
+        audio_chunks = []
+        sample_rate = musicgen_model.sample_rate
+        # Generate audio in chunks with distinct prompts for song structure
+        for i in range(num_chunks):
+            # Vary the prompt for each chunk to create clear song sections
+            if i == 0:
+                chunk_prompt = instrumental_prompt + ", focusing on a dynamic intro and expressive verse"
+            else:
+                chunk_prompt = instrumental_prompt + ", focusing on a powerful chorus and energetic outro"
+            print(f"Generating chunk {i+1}/{num_chunks} on GPU (prompt: {chunk_prompt})...")
+            musicgen_model.set_generation_params(
+                duration=generation_duration,
+                use_sampling=True,
+                top_k=top_k,
+                top_p=top_p,
+                temperature=temperature,
+                cfg_coef=cfg_scale
+            )
+            # Print resource usage before each chunk
+            print_resource_usage(f"Before Chunk {i+1} Generation")
+            with torch.no_grad():
+                with autocast():  # Use mixed precision for lower VRAM usage
+                    audio_chunk = musicgen_model.generate([chunk_prompt], progress=True)[0]  # Shape: (2, samples)
+            # Normalize to stereo
+            audio_chunk = audio_chunk.cpu().to(dtype=torch.float32)
+            if audio_chunk.dim() == 1:
+                audio_chunk = torch.stack([audio_chunk, audio_chunk], dim=0)  # Mono to stereo
+            elif audio_chunk.dim() == 2 and audio_chunk.shape[0] == 1:
+                audio_chunk = torch.cat([audio_chunk, audio_chunk], dim=0)
+            elif audio_chunk.dim() == 2 and audio_chunk.shape[0] != 2:
+                audio_chunk = audio_chunk[:1, :]
+                audio_chunk = torch.cat([audio_chunk, audio_chunk], dim=0)
+            elif audio_chunk.dim() > 2:
+                audio_chunk = audio_chunk.view(2, -1)
+            if audio_chunk.shape[0] != 2:
+                raise ValueError(f"Expected stereo audio with shape (2, samples), got shape {audio_chunk.shape}")
+            # Save chunk temporarily as WAV, then convert to MP3
+            temp_wav_path = f"temp_chunk_{i}.wav"
+            chunk_path = f"chunk_{i}.mp3"
+            torchaudio.save(temp_wav_path, audio_chunk, sample_rate, bits_per_sample=24)
+            segment = AudioSegment.from_wav(temp_wav_path)
+            segment = apply_limiter(segment, max_db=-8.0)  # Apply limiter early to control peaks
+            segment.export(chunk_path, format="mp3", bitrate="320k")
+            os.remove(temp_wav_path)  # Clean up temporary WAV file
+            audio_chunks.append(chunk_path)
+            # Free memory after generating each chunk
+            torch.cuda.empty_cache()
+            gc.collect()
+            time.sleep(0.5)  # Small delay to ensure memory is released
+            print_resource_usage(f"After Chunk {i+1} Generation")
+        # Combine chunks using pydub with crossfade to smooth transitions
+        print("Combining audio chunks...")
+        final_segment = AudioSegment.from_mp3(audio_chunks[0])
+        for i in range(1, len(audio_chunks)):
+            next_segment = AudioSegment.from_mp3(audio_chunks[i])
+            # Apply a gentle gain boost to raise quieter sections
+            next_segment = next_segment + 2  # Boost by 2 dB to avoid amplitude dips
+            next_segment = apply_limiter(next_segment, max_db=-8.0)  # Re-apply limiter after gain boost
+            final_segment = final_segment.append(next_segment, crossfade=crossfade_duration)
+        # Trim to exact total duration (in milliseconds)
+        final_segment = final_segment[:total_duration * 1000]
+        # Post-process to enhance audio quality
+        print("Post-processing final track...")
+        final_segment = apply_eq(final_segment)
+        final_segment = apply_chorus(final_segment)
+        # Apply final limiter and normalization with large headroom
+        final_segment = apply_limiter(final_segment, max_db=-8.0)
+        final_segment = final_segment.normalize(headroom=-10.0)  # Large headroom to prevent clipping
+        # Export as MP3 only
+        mp3_path = "output_cleaned.mp3"
+        final_segment.export(
+            mp3_path,
+            format="mp3",
+            bitrate="320k",
+            tags={"title": "GhostAI Instrumental", "artist": "GhostAI"}
+        )
+        print(f"Saved final audio to {mp3_path}")
+        # Clean up temporary chunk files
+        for chunk_path in audio_chunks:
+            os.remove(chunk_path)
+        # Print resource usage after generation
+        print_resource_usage("After Final Generation")
+        print(f"Total Generation Time: {time.time() - start_time:.2f} seconds")
+        return mp3_path, "✅ Done!"
+    except Exception as e:
+        return None, f"❌ Generation failed: {e}"
+    finally:
+        torch.cuda.empty_cache()
+        gc.collect()
+def clear_inputs():
+    return "", 3.0, 300, 0.95, 1.0, 30, 500
+# 7) CUSTOM CSS
+css = """
+body {
+    background: linear-gradient(135deg, #0A0A0A 0%, #1C2526 100%);
+    color: #E0E0E0;
+    font-family: 'Orbitron', sans-serif;
+    margin: 0;
+    padding: 0;
+}
+.header-container {
+    text-align: center;
+    padding: 15px 20px;
+    background: rgba(0, 0, 0, 0.9);
+    border-bottom: 1px solid #00FF9F;
+    box-shadow: 0 0 10px rgba(161, 0, 255, 0.3);
+}
+#ghost-logo {
+    font-size: 60px;
+    display: block;
+    margin: 0 auto;
+    animation: glitch-ghost 1.5s infinite;
+    text-shadow: 0 0 10px #A100FF, 0 0 20px #00FF9F;
+}
+h1 {
+    color: #A100FF;
+    font-size: 28px;
+    margin: 5px 0;
+    text-shadow: 0 0 5px #A100FF, 0 0 10px #00FF9F;
+    animation: glitch-text 2s infinite;
+}
+p {
+    color: #E0E0E0;
+    font-size: 14px;
+    margin: 5px 0;
+}
+.input-container {
+    max-width: 1000px;
+    margin: 20px auto;
+    padding: 20px;
+    background: rgba(28, 37, 38, 0.8);
+    border-radius: 10px;
+    box-shadow: 0 0 15px rgba(0, 255, 159, 0.3);
+}
+.textbox {
+    background: #1A1A1A;
+    border: 1px solid #A100FF;
+    color: #E0E0E0;
+    border-radius: 5px;
+    padding: 10px;
+    margin-bottom: 20px;
+}
+.genre-buttons {
+    display: flex;
+    justify-content: center;
+    gap: 15px;
+    margin-bottom: 20px;
+}
+.genre-btn {
+    background: linear-gradient(45deg, #A100FF, #00FF9F);
+    border: none;
+    color: #0A0A0A;
+    font-weight: bold;
+    padding: 10px 20px;
+    border-radius: 5px;
+    transition: transform 0.3s ease, box-shadow 0.3s ease;
+}
+.genre-btn:hover {
+    transform: scale(1.05);
+    box-shadow: 0 0 15px #00FF9F;
+}
+.settings-container {
+    max-width: 1000px;
+    margin: 20px auto;
+    padding: 20px;
+    background: rgba(28, 37, 38, 0.8);
+    border-radius: 10px;
+    box-shadow: 0 0 15px rgba(0, 255, 159, 0.3);
+}
+.action-buttons {
+    display: flex;
+    justify-content: center;
+    gap: 20px;
+    margin-top: 20px;
+}
+button {
+    background: linear-gradient(45deg, #A100FF, #00FF9F);
+    border: none;
+    color: #0A0A0A;
+    font-weight: bold;
+    padding: 12px 24px;
+    border-radius: 5px;
+    transition: transform 0.3s ease, box-shadow 0.3s ease;
+}
+button:hover {
+    transform: scale(1.05);
+    box-shadow: 0 0 15px #00FF9F;
+}
+.output-container {
+    max-width: 1000px;
+    margin: 20px auto;
+    padding: 20px;
+    background: rgba(28, 37, 38, 0.8);
+    border-radius: 10px;
+    box-shadow: 0 0 15px rgba(0, 255, 159, 0.3);
+    text-align: center;
+}
+@keyframes glitch-ghost {
+    0% { transform: translate(0, 0); opacity: 1; }
+    20% { transform: translate(-5px, 2px); opacity: 0.8; }
+    40% { transform: translate(5px, -2px); opacity: 0.6; }
+    60% { transform: translate(-3px, 1px); opacity: 0.9; }
+    80% { transform: translate(3px, -1px); opacity: 0.7; }
+    100% { transform: translate(0, 0); opacity: 1; }
+}
+@keyframes glitch-text {
+    0% { transform: translate(0, 0); text-shadow: 0 0 10px #A100FF, 0 0 20px #00FF9F; }
+    20% { transform: translate(-2px, 1px); text-shadow: 0 0 15px #00FF9F, 0 0 25px #A100FF; }
+    40% { transform: translate(2px, -1px); text-shadow: 0 0 10px #A100FF, 0 0 30px #00FF9F; }
+    60% { transform: translate(-1px, 2px); text-shadow: 0 0 15px #00FF9F, 0 0 20px #A100FF; }
+    80% { transform: translate(1px, -2px); text-shadow: 0 0 10px #A100FF, 0 0 25px #00FF9F; }
+    100% { transform: translate(0, 0); text-shadow: 0 0 10px #A100FF, 0 0 20px #00FF9F; }
+}
+@font-face {
+    font-family: 'Orbitron';
+    src: url('https://fonts.gstatic.com/s/orbitron/v29/yMJRMIlzdpvBhQQL_Qq7dy0.woff2') format('woff2');
+}
+"""
+# 8) BUILD WITH BLOCKS
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("""
+        <div class="header-container">
+            <div id="ghost-logo">👻</div>
+            <h1>GhostAI Music Generator</h1>
+            <p>Summon the Sound of the Unknown</p>
+        </div>
+    """)
+    with gr.Column(elem_classes="input-container"):
+        instrumental_prompt = gr.Textbox(
+            label="Instrumental Prompt",
+            placeholder="Click a genre button below or type your own instrumental prompt",
+            lines=4,
+            elem_classes="textbox"
+        )
+        with gr.Row(elem_classes="genre-buttons"):
+            rock_btn = gr.Button("Rock", elem_classes="genre-btn")
+            techno_btn = gr.Button("Techno", elem_classes="genre-btn")
+            jazz_btn = gr.Button("Jazz", elem_classes="genre-btn")
+            classical_btn = gr.Button("Classical", elem_classes="genre-btn")
+            hiphop_btn = gr.Button("Hip-Hop", elem_classes="genre-btn")
+    with gr.Column(elem_classes="settings-container"):
+        cfg_scale = gr.Slider(
+            label="Guidance Scale (CFG)",
+            minimum=1.0,
+            maximum=10.0,
+            value=3.0,
+            step=0.1,
+            info="Higher values make the instrumental more closely follow the prompt, but may reduce diversity."
+        )
+        top_k = gr.Slider(
+            label="Top-K Sampling",
+            minimum=10,
+            maximum=500,
+            value=300,
+            step=10,
+            info="Limits sampling to the top k most likely tokens. Higher values increase diversity."
+        )
+        top_p = gr.Slider(
+            label="Top-P Sampling (Nucleus Sampling)",
+            minimum=0.0,
+            maximum=1.0,
+            value=0.95,
+            step=0.1,
+            info="Keeps tokens with cumulative probability above p. Higher values increase diversity."
+        )
+        temperature = gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=2.0,
+            value=1.0,
+            step=0.1,
+            info="Controls randomness. Higher values make output more diverse but less predictable."
+        )
+        total_duration = gr.Slider(
+            label="Total Duration (seconds)",
+            minimum=10,
+            maximum=60,
+            value=30,
+            step=1,
+            info="Total duration of the track (10 to 60 seconds)."
+        )
+        crossfade_duration = gr.Slider(
+            label="Crossfade Duration (ms)",
+            minimum=100,
+            maximum=2000,
+            value=500,
+            step=100,
+            info="Crossfade duration between chunks for smoother transitions."
+        )
+        with gr.Row(elem_classes="action-buttons"):
+            gen_btn = gr.Button("Generate Music")
+            clr_btn = gr.Button("Clear Inputs")
+    with gr.Column(elem_classes="output-container"):
+        out_audio = gr.Audio(label="Generated Stereo Instrumental Track", type="filepath")
+        status = gr.Textbox(label="Status", interactive=False)
+    rock_btn.click(set_rock_prompt, inputs=None, outputs=[instrumental_prompt])
+    techno_btn.click(set_techno_prompt, inputs=None, outputs=[instrumental_prompt])
+    jazz_btn.click(set_jazz_prompt, inputs=None, outputs=[instrumental_prompt])
+    classical_btn.click(set_classical_prompt, inputs=None, outputs=[instrumental_prompt])
+    hiphop_btn.click(set_hiphop_prompt, inputs=None, outputs=[instrumental_prompt])
+    gen_btn.click(
+        generate_music,
+        inputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration],
+        outputs=[out_audio, status]
+    )
+    clr_btn.click(
+        clear_inputs,
+        inputs=None,
+        outputs=[instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, crossfade_duration]
+    )
+# 9) TURN OFF OPENAPI/DOCS to avoid the gradio-client schema bug
+app = demo.launch(
+    server_name="0.0.0.0",
+    server_port=9999,
+    share=False,
+    inbrowser=False,
+    show_error=True  # keep this so you still see stack traces in console
+)
+# Access the underlying FastAPI and disable its docs
+try:
+    fastapi_app = demo._server.app  # Gradio v4.44+ puts FastAPI here
+    fastapi_app.docs_url = None
+    fastapi_app.redoc_url = None
+    fastapi_app.openapi_url = None
+except Exception:
+    pass