Spaces:

vitorcalvi
/

mmesa-gitex

Build error

App Files Files Community

mmesa-gitex / tabs /speech_stress_analysis.py

vitorcalvi

12 Oct Gitex 2024

b20a621 about 1 year ago

raw

history blame

4.78 kB

	# tabs/speech_stress_analysis.py

	import gradio as gr
	import librosa
	import librosa.display
	import numpy as np
	import matplotlib.pyplot as plt
	import tempfile
	import warnings

	# Suppress specific warnings from transformers if needed
	warnings.filterwarnings("ignore", category=UserWarning, module='transformers')

	def extract_audio_features(audio_file):
	y, sr = librosa.load(audio_file, sr=None)
	mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
	pitches = pitches[(magnitudes > np.median(magnitudes)) & (pitches > 0)]
	energy = librosa.feature.rms(y=y)[0]
	return mfccs, pitches, energy, y, sr

	def analyze_voice_stress(audio_file):
	if not audio_file:
	return "No audio file provided.", None

	try:
	mfccs, pitches, energy, y, sr = extract_audio_features(audio_file)

	# Calculate variances
	var_mfccs = np.var(mfccs, axis=1).mean() # Mean variance across MFCC coefficients
	var_energy = np.var(energy) # Variance of RMS energy
	var_pitches = np.var(pitches) if len(pitches) > 0 else 0 # Variance of pitches if present

	# Debugging: Print individual variances
	print(f"Variance MFCCs (mean across coefficients): {var_mfccs}")
	print(f"Variance Energy: {var_energy}")
	print(f"Variance Pitches: {var_pitches}")

	# Normalize each variance using Z-Score Standardization
	# These parameters should be calibrated based on a representative dataset
	mfccs_mean = 1000
	mfccs_std = 500
	energy_mean = 0.005
	energy_std = 0.005
	pitches_mean = 500000
	pitches_std = 200000

	norm_var_mfccs = (var_mfccs - mfccs_mean) / mfccs_std
	norm_var_energy = (var_energy - energy_mean) / energy_std
	norm_var_pitches = (var_pitches - pitches_mean) / pitches_std if var_pitches > 0 else 0

	# Debugging: Print normalized variances
	print(f"Normalized Variance MFCCs: {norm_var_mfccs}")
	print(f"Normalized Variance Energy: {norm_var_energy}")
	print(f"Normalized Variance Pitches: {norm_var_pitches}")

	# Combine normalized variances
	stress_level = np.mean([
	norm_var_mfccs,
	norm_var_energy,
	norm_var_pitches
	]) if var_pitches > 0 else np.mean([norm_var_mfccs, norm_var_energy])

	# Debugging: Print stress_level before normalization
	print(f"Calculated Stress Level (before scaling): {stress_level}")

	# Scale to 0-100%
	normalized_stress = (stress_level + 3) / 6 * 100 # Maps -3 to 0%, +3 to 100%
	normalized_stress = np.clip(normalized_stress, 0, 100) # Ensure within 0-100%

	# Debugging: Print normalized_stress
	print(f"Normalized Stress Level: {normalized_stress}")

	# Plotting
	fig, axs = plt.subplots(3, 1, figsize=(10, 12))

	# MFCCs
	img_mfcc = librosa.display.specshow(mfccs, sr=sr, x_axis='time', ax=axs[0])
	axs[0].set_title('MFCCs')
	axs[0].set_ylabel('MFCC Coefficient')
	fig.colorbar(img_mfcc, ax=axs[0])

	# Pitch
	axs[1].plot(pitches)
	axs[1].set_title('Pitch')
	axs[1].set_ylabel('Frequency (Hz)')

	# Energy
	axs[2].plot(energy)
	axs[2].set_title('Energy (RMS)')
	axs[2].set_ylabel('RMS Energy')
	axs[2].set_xlabel('Frames')

	plt.tight_layout()
	with tempfile.NamedTemporaryFile(delete=False, suffix='.png') as temp_file:
	plt.savefig(temp_file.name)
	plot_path = temp_file.name
	plt.close()

	# Interpretation
	if normalized_stress < 33:
	stress_interpretation = "Low"
	elif normalized_stress < 66:
	stress_interpretation = "Medium"
	else:
	stress_interpretation = "High"

	return f"{normalized_stress:.2f}% - {stress_interpretation} Stress", plot_path
	except Exception as e:
	return f"Error: {str(e)}", None

	def create_voice_stress_tab():
	with gr.Row():
	with gr.Column(scale=2):
	input_audio = gr.Audio(label="Input Audio", type="filepath")
	with gr.Row():
	clear_btn = gr.Button("Clear", scale=1)
	submit_btn = gr.Button("Analyze", scale=1, elem_classes="submit")
	with gr.Column(scale=1):
	output_stress = gr.Label(label="Stress Level")
	output_plot = gr.Image(label="Stress Analysis Plot")

	submit_btn.click(analyze_voice_stress, inputs=[input_audio], outputs=[output_stress, output_plot])
	clear_btn.click(lambda: (None, None), outputs=[input_audio, output_stress, output_plot])

	gr.Examples(["./assets/audio/fitness.wav"], inputs=[input_audio])