deepfakeaudio / app.py

Update app.py

55cf66a verified 10 months ago

4.8 kB

	import numpy as np
	import librosa
	import tensorflow as tf
	import streamlit as st
	import sounddevice as sd
	import wave
	import os

	# Constants
	window_length = 0.02 # 20ms window length
	hop_length = 0.0025 # 2.5ms hop length
	sample_rate = 22050 # Standard audio sample rate
	n_mels = 128 # Number of mel filter banks
	threshold_zcr = 0.1 # Adjust this threshold to detect breath based on ZCR
	threshold_rmse = 0.1 # Adjust this threshold to detect breath based on RMSE
	max_len = 500 # Fix length for feature extraction

	# Load TFLite model
	interpreter = tf.lite.Interpreter(model_path="model_breath_logspec_mfcc_cnn.tflite")
	interpreter.allocate_tensors()

	# Get input and output details
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	# Function to extract breath features
	def extract_breath_features(y, sr):
	frame_length = int(window_length * sr)
	hop_length_samples = int(hop_length * sr)

	zcr = librosa.feature.zero_crossing_rate(y=y, frame_length=frame_length, hop_length=hop_length_samples)
	rmse = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length_samples)

	zcr = zcr.T.flatten()
	rmse = rmse.T.flatten()

	breaths = (zcr > threshold_zcr) & (rmse > threshold_rmse)
	breath_feature = np.where(breaths, 1, 0)

	return breath_feature

	# Feature extraction
	def extract_features(file_path):
	try:
	y, sr = librosa.load(file_path, sr=None)
	mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
	logspec = librosa.amplitude_to_db(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels))
	breath_feature = extract_breath_features(y, sr)

	# Fix lengths
	mfcc = librosa.util.fix_length(mfcc, size=max_len, axis=1)
	logspec = librosa.util.fix_length(logspec, size=max_len, axis=1)
	breath_feature = librosa.util.fix_length(breath_feature, size=max_len)

	return np.vstack((mfcc, logspec, breath_feature))
	except Exception as e:
	st.error(f"Error processing audio: {e}")
	return None

	# Prepare input for model
	def prepare_single_data(features):
	features = librosa.util.fix_length(features, size=max_len, axis=1)
	features = features[np.newaxis, ..., np.newaxis] # Add batch and channel dimensions
	return features.astype(np.float32) # Convert to FLOAT32

	# Predict audio class
	def predict_audio(file_path):
	features = extract_features(file_path)
	if features is not None:
	prepared_features = prepare_single_data(features)
	interpreter.set_tensor(input_details[0]['index'], prepared_features)
	interpreter.invoke()
	prediction = interpreter.get_tensor(output_details[0]['index'])
	predicted_class = np.argmax(prediction, axis=1)
	predicted_prob = prediction[0]
	return predicted_class[0], predicted_prob
	return None, None

	# Record audio function
	def record_audio(duration=5, samplerate=22050):
	st.info(f"🎤 Recording for {duration} seconds...")
	audio_data = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype=np.int16)
	sd.wait()
	st.success("✅ Recording Complete!")
	return audio_data, samplerate

	# Save recorded audio as .wav
	def save_wav(file_path, audio_data, samplerate):
	with wave.open(file_path, 'wb') as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(samplerate)
	wf.writeframes(audio_data.tobytes())

	# Streamlit UI
	st.title('🎙️ Audio Deepfake Detection')
	st.write('Upload or record an audio file to classify it as real or fake.')

	# File uploader
	uploaded_file = st.file_uploader('📂 Upload an audio file', type=['wav', 'mp3'])
	recorded_file_path = "recorded_audio.wav"

	# Record audio button
	if st.button("🎤 Record Live Audio"):
	duration = st.slider("⏳ Set Duration (seconds)", 1, 10, 5)
	audio_data, samplerate = record_audio(duration)
	save_wav(recorded_file_path, audio_data, samplerate)
	st.audio(recorded_file_path, format="audio/wav")

	# Process uploaded or recorded audio
	if uploaded_file is not None:
	with open("uploaded_audio.wav", 'wb') as f:
	f.write(uploaded_file.getbuffer())
	file_path = "uploaded_audio.wav"
	st.audio(file_path, format="audio/wav")
	elif os.path.exists(recorded_file_path):
	file_path = recorded_file_path
	else:
	file_path = None

	# Run prediction
	if file_path:
	prediction, probability = predict_audio(file_path)
	if prediction is not None:
	st.write(f'Predicted Class: {prediction}')
	st.write(f'Probability of being Real: {probability[0]*100:.2f}%')
	st.write(f'Probability of being Fake: {probability[1]*100:.2f}%')
	else:
	st.error("❌ Failed to process the audio file.")