Spaces:

Phonepadith
/

whisper-3-large-lao-fine-tuned

Sleeping

App Files Files Community

Phonepadith commited on Oct 14, 2025

Commit

489c533

verified ·

1 Parent(s): 15348a7

Create app.py

Browse files

Files changed (1) hide show

app.py +110 -0

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# app.py - Gradio interface for Whisper Lao ASR
+import gradio as gr
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import numpy as np
+# Load model and processor
+model_id = "Phonepadith/whisper-3-large-lao-finetuned-v1"
+processor = WhisperProcessor.from_pretrained(model_id)
+model = WhisperForConditionalGeneration.from_pretrained(model_id)
+# Move to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def transcribe_audio(audio):
+    """
+    Transcribe audio to Lao text
+    Args:
+        audio: tuple (sample_rate, audio_array) from Gradio
+    Returns:
+        transcription: Lao text
+    """
+    if audio is None:
+        return "Please upload or record audio."
+    # Get sample rate and audio array
+    sample_rate, audio_array = audio
+    # Convert to float32 and normalize
+    audio_array = audio_array.astype(np.float32)
+    if audio_array.max() > 1.0:
+        audio_array = audio_array / np.iinfo(audio_array.dtype).max
+    # Resample to 16kHz if needed
+    if sample_rate != 16000:
+        import librosa
+        audio_array = librosa.resample(
+            audio_array,
+            orig_sr=sample_rate,
+            target_sr=16000
+        )
+    # Process audio
+    input_features = processor(
+        audio_array,
+        sampling_rate=16000,
+        return_tensors="pt"
+    ).input_features.to(device)
+    # Generate transcription
+    with torch.no_grad():
+        predicted_ids = model.generate(input_features)
+    # Decode transcription
+    transcription = processor.batch_decode(
+        predicted_ids,
+        skip_special_tokens=True
+    )[0]
+    return transcription
+# Create Gradio interface
+demo = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(
+        sources=["microphone", "upload"],
+        type="numpy",
+        label="Record or Upload Lao Audio"
+    ),
+    outputs=gr.Textbox(
+        label="Transcription (ພາສາລາວ)",
+        placeholder="Your transcription will appear here...",
+        lines=5
+    ),
+    title="🗣️ Whisper Large Lao ASR",
+    description="""
+    ### Automatic Speech Recognition for Lao Language
+    This model transcribes Lao (ພາສາລາວ) speech to text using a fine-tuned Whisper Large model.
+    **How to use:**
+    1. Click the microphone icon to record audio, or upload an audio file
+    2. Wait for the transcription to appear
+    **Supported formats:** WAV, MP3, OGG, FLAC (16kHz recommended)
+    """,
+    article="""
+    ### About this model
+    - **Model:** Fine-tuned Whisper Large for Lao
+    - **Dataset:** 7k+ Lao speech samples
+    - **Repository:** [Phonepadith/whisper-3-large-lao-finetuned-v1](https://huggingface.co/Phonepadith/whisper-3-large-lao-finetuned-v1)
+    ---
+    Created by [@Phonepadith](https://huggingface.co/Phonepadith) | 📧 [email protected]
+    """,
+    examples=[
+        # Add example audio files here if you have them
+        # ["example1.wav"],
+        # ["example2.wav"],
+    ],
+    cache_examples=False,
+    theme=gr.themes.Soft()
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()