Spaces:
Build error
Build error
| # tabs/speech_stress_analysis.py | |
| import gradio as gr | |
| import librosa | |
| import librosa.display | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import tempfile | |
| import warnings | |
| # Suppress specific warnings from transformers if needed | |
| warnings.filterwarnings("ignore", category=UserWarning, module='transformers') | |
| def extract_audio_features(audio_file): | |
| y, sr = librosa.load(audio_file, sr=None) | |
| mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
| pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)] | |
| energy = librosa.feature.rms(y=y)[0] | |
| return mfccs, pitches, energy, y, sr | |
| def analyze_voice_stress(audio_file): | |
| if not audio_file: | |
| return "No audio file provided.", None | |
| try: | |
| mfccs, pitches, energy, y, sr = extract_audio_features(audio_file) | |
| # Calculate variances | |
| var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients | |
| var_energy = np.var(energy) # Variance of RMS energy | |
| var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present | |
| # Debugging: Print individual variances | |
| print(f"Variance MFCCs (mean across coefficients): {var_mfccs}") | |
| print(f"Variance Energy: {var_energy}") | |
| print(f"Variance Pitches: {var_pitches}") | |
| # Normalize each variance using Z-Score Standardization | |
| # These parameters should be calibrated based on a representative dataset | |
| mfccs_mean = 1000 | |
| mfccs_std = 500 | |
| energy_mean = 0.005 | |
| energy_std = 0.005 | |
| pitches_mean = 500000 | |
| pitches_std = 200000 | |
| norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std | |
| norm_var_energy = (var_energy - energy_mean) / energy_std | |
| norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0 | |
| # Debugging: Print normalized variances | |
| print(f"Normalized Variance MFCCs: {norm_var_mfccs}") | |
| print(f"Normalized Variance Energy: {norm_var_energy}") | |
| print(f"Normalized Variance Pitches: {norm_var_pitches}") | |
| # Combine normalized variances | |
| stress_level = np.mean([ | |
| norm_var_mfccs, | |
| norm_var_energy, | |
| norm_var_pitches | |
| ]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy]) | |
| # Debugging: Print stress_level before normalization | |
| print(f"Calculated Stress Level (before scaling): {stress_level}") | |
| # Scale to 0-100% | |
| normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100% | |
| normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100% | |
| # Debugging: Print normalized_stress | |
| print(f"Normalized Stress Level: {normalized_stress}") | |
| # Plotting | |
| fig, axs = plt.subplots(3, 1, figsize=(10, 12)) | |
| # MFCCs | |
| img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0]) | |
| axs[0].set_title('MFCCs') | |
| axs[0].set_ylabel('MFCC Coefficient') | |
| fig.colorbar(img_mfcc, ax=axs[0]) | |
| # Pitch | |
| axs[1].plot(pitches) | |
| axs[1].set_title('Pitch') | |
| axs[1].set_ylabel('Frequency (Hz)') | |
| # Energy | |
| axs[2].plot(energy) | |
| axs[2].set_title('Energy (RMS)') | |
| axs[2].set_ylabel('RMS Energy') | |
| axs[2].set_xlabel('Frames') | |
| plt.tight_layout() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file: | |
| plt.savefig(temp_file.name) | |
| plot_path = temp_file.name | |
| plt.close() | |
| # Interpretation | |
| if normalized_stress < 33: | |
| stress_interpretation = "Low" | |
| elif normalized_stress < 66: | |
| stress_interpretation = "Medium" | |
| else: | |
| stress_interpretation = "High" | |
| return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path | |
| except Exception as e: | |
| return f"Error: {str(e)}", None | |
| def create_voice_stress_tab(): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| input_audio = gr.Audio(label="Input Audio", type="filepath") | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear", scale=1) | |
| submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit") | |
| with gr.Column(scale=1): | |
| output_stress = gr.Label(label="Stress Level") | |
| output_plot = gr.Image(label="Stress Analysis Plot") | |
| submit_btn.click(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot]) | |
| clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot]) | |
| gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio]) | |