Spaces:

MrUtakata
/

kda

Sleeping

App Files Files Community

kda / app.py

MrUtakata

Create app.py

f7f3a00 verified 8 months ago

raw

history blame

3.21 kB

	# streamlit_app.py
	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	import tensorflow as tf

	# ─── Caching loaders so they only run once ───────────────────────────────────
	@st.cache(allow_output_mutation=True)
	def load_preprocessor(path="preprocessor.pkl"):
	return joblib.load(path)

	@st.cache(allow_output_mutation=True)
	def load_label_encoder(path="label_encoder.pkl"):
	return joblib.load(path)

	@st.cache(allow_output_mutation=True)
	def load_model(path="keystroke_dnn.h5"):
	return tf.keras.models.load_model(path)

	# ─── Prediction function ────────────────────────────────────────────────────
	def predict_subjects(df_raw):
	"""
	Takes a DataFrame of raw keystroke features, drops any
	'subject'/'sessionIndex'/'rep' columns, re-orders to the
	exact list the preprocessor saw at train-time, scales,
	runs the DNN, and returns predicted IDs + probabilities.
	"""
	preprocessor = load_preprocessor()
	label_encoder = load_label_encoder()
	model = load_model()

	# 1) Drop unused cols if present
	for c in ("subject", "sessionIndex", "rep"):
	if c in df_raw.columns:
	df_raw = df_raw.drop(columns=[c])

	# 2) Re-order columns exactly as during training
	feature_cols = preprocessor.transformers_[0][2]
	df_features = df_raw[feature_cols]

	# 3) Scale
	X_scaled = preprocessor.transform(df_features)

	# 4) Model inference
	y_prob = model.predict(X_scaled)
	idx_pred = np.argmax(y_prob, axis=1)

	# 5) Decode one‐hot back to original labels
	labels = label_encoder.categories_[0][idx_pred]

	# 6) Build output
	df_out = pd.DataFrame({"predicted_subject": labels})
	for i, cls in enumerate(label_encoder.categories_[0]):
	df_out[f"prob_{cls}"] = y_prob[:, i]

	return df_out

	# ─── Streamlit UI ──────────────────────────────────────────────────────────
	def main():
	st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")
	st.title("🔑 Keystroke Dynamics Authentication")
	st.markdown(
	"Upload a CSV of raw keystroke‐feature vectors (one row per sample). "
	"The app will drop any `subject`/`sessionIndex`/`rep` columns, scale, "
	"run through the DNN, and return predicted subject IDs + confidence scores."
	)

	uploaded = st.file_uploader("Choose CSV file", type="csv")
	if not uploaded:
	return

	df = pd.read_csv(uploaded)
	st.write("### Raw feature preview (first 5 rows)")
	st.dataframe(df.head(), use_container_width=True)

	try:
	df_preds = predict_subjects(df)
	st.write("### Predictions")
	st.dataframe(df_preds, use_container_width=True)
	except KeyError as e:
	st.error(f"Missing expected feature column: {e}")
	except Exception as e:
	st.error(f"Error during prediction: {e}")

	if __name__ == "__main__":
	main()