Spaces:

Rahulk2197
/

rashmi_app

Sleeping

App Files Files Community

rashmi_app / feat.py

Rahulk2197

Update feat.py

5d5d275 verified over 1 year ago

raw

history blame

4.42 kB


	import librosa
	import numpy as np

	def features_extractor(file_name):
	audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

	# Extract MFCC features
	mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=25)
	mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)

	# Extract Zero Crossing Rate
	zcr = librosa.feature.zero_crossing_rate(y=audio)
	zcr_scaled_features = np.mean(zcr.T, axis=0)

	# Extract Chroma Features
	chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
	chroma_scaled_features = np.mean(chroma.T, axis=0)

	# Extract Mel Spectrogram Features
	mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
	mel_scaled_features = np.mean(mel.T, axis=0)

	# Concatenate all features into a single array
	features = np.hstack((mfccs_scaled_features, zcr_scaled_features, chroma_scaled_features, mel_scaled_features))

	return features


	#########################################################################################################################
	import speech_recognition as sr

	def recognize_speech_from_file(audio_file_path):
	# Initialize the recognizer
	recognizer = sr.Recognizer()

	# Load the audio file
	with sr.AudioFile(audio_file_path) as source:

	audio_data = recognizer.record(source) # Read the entire audio file

	try:
	# Recognize speech using Google Web Speech API
	text = recognizer.recognize_google(audio_data)

	return text
	except sr.RequestError as e:
	print(f"Could not request results; {e}")
	except sr.UnknownValueError:
	print("Could not understand the audio")

	def count_words(text):
	words = text.split()
	return len(words)

	def word_count(audio_path):
	transcript = recognize_speech_from_file(audio_file_path=audio_path)
	if transcript:
	return [count_words(transcript),transcript]

	########################################################################################################################
	import speech_recognition as sr
	import wave

	def recognize_speech_from_file(audio_file_path):
	recognizer = sr.Recognizer()
	audio_file = sr.AudioFile(audio_file_path)
	with audio_file as source:
	audio = recognizer.record(source)
	try:
	transcript = recognizer.recognize_google(audio)
	return transcript
	except sr.UnknownValueError:
	return None
	except sr.RequestError as e:
	print(f"Could not request results from Google Speech Recognition service; {e}")
	return None

	def count_words(text):
	words = text.split()
	return len(words)

	def get_audio_duration(audio_file_path):
	with wave.open(audio_file_path, 'r') as audio_file:
	frames = audio_file.getnframes()
	rate = audio_file.getframerate()
	duration = frames / float(rate)
	return duration

	def word_count1(audio_path):
	transcript = recognize_speech_from_file(audio_file_path=audio_path)
	if transcript:
	duration = get_audio_duration(audio_path)
	return [count_words(transcript), transcript, duration]
	else:
	return [0, None, 0.0]

	# word_count('angry_Akash.wav')

	# print(word_count1(r'c:\Users\hp\OneDrive\Desktop\Major Emotions\Mixed\Angry-1-3-1.wav'))
	# Example usage
	# audio_path = 'angry_Ansh.wav'
	# result = word_count(audio_path)
	# print(result)

	import librosa
	import numpy as np
	from pyAudioAnalysis import audioSegmentation as aS

	def get_speaking_rate(file_path):
	# Load audio file
	y, sr = librosa.load(file_path, sr=None)

	# Extract speech segments
	segments = aS.silence_removal(y, sr, 0.020, 0.020, smooth_window=1.0, weight=0.3, plot=False)

	# Total speech duration
	speech_duration = sum([end - start for start, end in segments])

	# Number of syllables (approximation)
	num_syllables = len(librosa.effects.split(y, top_db=30))

	# Calculate speaking rate (syllables per second)
	speaking_rate = num_syllables / speech_duration if speech_duration > 0 else 0

	return speaking_rate

	# Example usage
	# file_path = 'angry_Ansh.wav'
	# speaking_rate = get_speaking_rate(file_path)[0]
	# print(f"Speaking Rate: {speaking_rate:.2f} syllables per second")
	# print(get_speaking_rate(file_path)[1])
	# print(get_speaking_rate(file_path)[2])