# streamlit_app.py import streamlit as st import pandas as pd import numpy as np import joblib import tensorflow as tf # ─── Caching loaders so they only run once ─────────────────────────────────── @st.cache(allow_output_mutation=True) def load_preprocessor(path="preprocessor.pkl"): return joblib.load(path) @st.cache(allow_output_mutation=True) def load_label_encoder(path="label_encoder.pkl"): return joblib.load(path) @st.cache(allow_output_mutation=True) def load_model(path="keystroke_dnn.h5"): return tf.keras.models.load_model(path) # ─── Prediction function ──────────────────────────────────────────────────── def predict_subjects(df_raw): """ Takes a DataFrame of raw keystroke features, drops any 'subject'/'sessionIndex'/'rep' columns, re-orders to the exact list the preprocessor saw at train-time, scales, runs the DNN, and returns predicted IDs + probabilities. """ preprocessor = load_preprocessor() label_encoder = load_label_encoder() model = load_model() # 1) Drop unused cols if present for c in ("subject", "sessionIndex", "rep"): if c in df_raw.columns: df_raw = df_raw.drop(columns=[c]) # 2) Re-order columns exactly as during training feature_cols = preprocessor.transformers_[0][2] df_features = df_raw[feature_cols] # 3) Scale X_scaled = preprocessor.transform(df_features) # 4) Model inference y_prob = model.predict(X_scaled) idx_pred = np.argmax(y_prob, axis=1) # 5) Decode one‐hot back to original labels labels = label_encoder.categories_[0][idx_pred] # 6) Build output df_out = pd.DataFrame({"predicted_subject": labels}) for i, cls in enumerate(label_encoder.categories_[0]): df_out[f"prob_{cls}"] = y_prob[:, i] return df_out # ─── Streamlit UI ────────────────────────────────────────────────────────── def main(): st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide") st.title("🔑 Keystroke Dynamics Authentication") st.markdown( "Upload a CSV of raw keystroke‐feature vectors (one row per sample). " "The app will drop any `subject`/`sessionIndex`/`rep` columns, scale, " "run through the DNN, and return predicted subject IDs + confidence scores." ) uploaded = st.file_uploader("Choose CSV file", type="csv") if not uploaded: return df = pd.read_csv(uploaded) st.write("### Raw feature preview (first 5 rows)") st.dataframe(df.head(), use_container_width=True) try: df_preds = predict_subjects(df) st.write("### Predictions") st.dataframe(df_preds, use_container_width=True) except KeyError as e: st.error(f"Missing expected feature column: {e}") except Exception as e: st.error(f"Error during prediction: {e}") if __name__ == "__main__": main()