Spaces:
Sleeping
Sleeping
| import librosa | |
| import numpy as np | |
| def features_extractor(file_name): | |
| audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') | |
| # Extract MFCC features | |
| mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25) | |
| mfccs_scaled_features = np.mean(mfccs_features.T, axis=0) | |
| # Extract Zero Crossing Rate | |
| zcr = librosa.feature.zero_crossing_rate(y=audio) | |
| zcr_scaled_features = np.mean(zcr.T, axis=0) | |
| # Extract Chroma Features | |
| chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate) | |
| chroma_scaled_features = np.mean(chroma.T, axis=0) | |
| # Extract Mel Spectrogram Features | |
| mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate) | |
| mel_scaled_features = np.mean(mel.T, axis=0) | |
| # Concatenate all features into a single array | |
| features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features)) | |
| return features | |
| ######################################################################################################################### | |
| import speech_recognition as sr | |
| def recognize_speech_from_file(audio_file_path): | |
| # Initialize the recognizer | |
| recognizer = sr.Recognizer() | |
| # Load the audio file | |
| with sr.AudioFile(audio_file_path) as source: | |
| audio_data = recognizer.record(source) # Read the entire audio file | |
| try: | |
| # Recognize speech using Google Web Speech API | |
| text = recognizer.recognize_google(audio_data) | |
| return text | |
| except sr.RequestError as e: | |
| print(f"Could not request results; {e}") | |
| except sr.UnknownValueError: | |
| print("Could not understand the audio") | |
| def count_words(text): | |
| words = text.split() | |
| return len(words) | |
| def word_count(audio_path): | |
| transcript = recognize_speech_from_file(audio_file_path=audio_path) | |
| if transcript: | |
| return [count_words(transcript),transcript] | |
| ######################################################################################################################## | |
| import speech_recognition as sr | |
| import wave | |
| def recognize_speech_from_file(audio_file_path): | |
| recognizer = sr.Recognizer() | |
| audio_file = sr.AudioFile(audio_file_path) | |
| with audio_file as source: | |
| audio = recognizer.record(source) | |
| try: | |
| transcript = recognizer.recognize_google(audio) | |
| return transcript | |
| except sr.UnknownValueError: | |
| return None | |
| except sr.RequestError as e: | |
| print(f"Could not request results from Google Speech Recognition service; {e}") | |
| return None | |
| def count_words(text): | |
| words = text.split() | |
| return len(words) | |
| def get_audio_duration(audio_file_path): | |
| with wave.open(audio_file_path, 'r') as audio_file: | |
| frames = audio_file.getnframes() | |
| rate = audio_file.getframerate() | |
| duration = frames / float(rate) | |
| return duration | |
| def word_count1(audio_path): | |
| transcript = recognize_speech_from_file(audio_file_path=audio_path) | |
| if transcript: | |
| duration = get_audio_duration(audio_path) | |
| return [count_words(transcript), transcript, duration] | |
| else: | |
| return [0, None, 0.0] | |
| # word_count('angry_Akash.wav') | |
| # print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav')) | |
| # Example usage | |
| # audio_path = 'angry_Ansh.wav' | |
| # result = word_count(audio_path) | |
| # print(result) | |
| import librosa | |
| import numpy as np | |
| from pyAudioAnalysis import audioSegmentation as aS | |
| def get_speaking_rate(file_path): | |
| # Load audio file | |
| y, sr = librosa.load(file_path, sr=None) | |
| # Extract speech segments | |
| segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False) | |
| # Total speech duration | |
| speech_duration = sum([end - start for start, end in segments]) | |
| # Number of syllables (approximation) | |
| num_syllables = len(librosa.effects.split(y, top_db=30)) | |
| # Calculate speaking rate (syllables per second) | |
| speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0 | |
| return speaking_rate | |
| # Example usage | |
| # file_path = 'angry_Ansh.wav' | |
| # speaking_rate = get_speaking_rate(file_path)[0] | |
| # print(f"Speaking Rate: {speaking_rate:.2f} syllables per second") | |
| # print(get_speaking_rate(file_path)[1]) | |
| # print(get_speaking_rate(file_path)[2]) | |