Spaces:

vitorcalvi
/

Voice-Time-Frequency

Sleeping

App Files Files Community

vitorcalvi commited on Jul 16, 2024

Commit

74ee391

1 Parent(s): c8fe47c

Add application file

Browse files

Files changed (1) hide show

app.py +68 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+from transformers import pipeline
+import librosa
+import numpy as np
+import matplotlib.pyplot as plt
+import torch
+import spaces
+# Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load Whisper model using transformers pipeline
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=0 if device == "cuda" else -1)
+@spaces.GPU
+def analyze_audio(audio):
+    # Convert audio to text using Whisper
+    transcription_result = transcriber(audio)
+    transcription = transcription_result["text"]
+    # Load audio file
+    y, sr = librosa.load(audio, sr=None)
+    # Extract prosodic features
+    pitch = librosa.yin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
+    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+    # Calculate pitch variance
+    pitch_variance = np.var(pitch)
+    # Estimate speaking pace (syllables per second)
+    num_syllables = len(transcription.split())
+    duration = librosa.get_duration(y=y, sr=sr)
+    pace = num_syllables / duration
+    # Plot pitch
+    plt.figure(figsize=(10, 4))
+    plt.plot(pitch, label='Pitch')
+    plt.xlabel('Time')
+    plt.ylabel('Frequency (Hz)')
+    plt.title('Pitch Over Time')
+    plt.legend()
+    pitch_plot_path = '/tmp/pitch_contour.png'
+    plt.savefig(pitch_plot_path)
+    plt.close()
+    # Voice Stress Analysis (simplified example)
+    stress_level = np.std(pitch)
+    return transcription, tempo, pace, pitch_variance, pitch_plot_path
+# Create Gradio interface
+input_audio = gr.Audio(label="Input Audio", type="filepath")
+iface = gr.Interface(
+    fn=analyze_audio,
+    inputs=input_audio,
+    outputs=[
+        gr.Textbox(label="Transcription"),
+        gr.Number(label="Tempo (BPM)"),
+        gr.Number(label="Speaking Pace (syllables/sec)"),
+        gr.Number(label="Pitch Variance"),
+        gr.Image(label="Pitch Contour Plot")
+    ],
+    live=True
+)
+iface.launch(share=False)