# ─── streamlit_app.py ──────────────────────────────────────────────────────── import streamlit as st # ─── MUST BE FIRST ─────────────────────────────────────────────────────────── st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide") import pandas as pd import numpy as np import joblib import tensorflow as tf from io import StringIO # ─── Caching loaders so they only run once per session ─────────────────────── @st.cache_resource def load_preprocessor(path="preprocessor.pkl"): return joblib.load(path) @st.cache_resource def load_label_encoder(path="label_encoder.pkl"): return joblib.load(path) @st.cache_resource def load_model(path="keystroke_dnn.h5"): return tf.keras.models.load_model(path) # ─── Prediction helper ─────────────────────────────────────────────────────── def predict_subjects(df_raw): preprocessor = load_preprocessor() label_encoder = load_label_encoder() model = load_model() # Drop non-feature columns if present for c in ("subject", "sessionIndex", "rep"): if c in df_raw.columns: df_raw = df_raw.drop(columns=[c]) # Re-order and validate features feature_cols = preprocessor.transformers_[0][2] df_features = df_raw[feature_cols] # Predict X_scaled = preprocessor.transform(df_features) y_prob = model.predict(X_scaled) idx_pred = np.argmax(y_prob, axis=1) labels = label_encoder.categories_[0][idx_pred] # Build result df_out = pd.DataFrame({"Predicted Subject": labels}) for i, cls in enumerate(label_encoder.categories_[0]): df_out[f"Probability {cls}"] = y_prob[:, i] return df_out # ─── Streamlit App ──────────────────────────────────────────────────────────── def main(): st.title("🔐 Keystroke Dynamics Authenticator") st.markdown(""" This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm. **How it works:** You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it. """) with st.expander("ℹ️ About the data format", expanded=False): st.markdown(""" Each row includes 34 values: - First 3 columns (optional): `subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50) - Next 31 columns: Hold times (how long each key was pressed) and latency features: - `H.key`: time a key was held - `DD.key1.key2`: time between pressing key1 and key2 - `UD.key1.key2`: time between releasing key1 and pressing key2 **Example row (tab or comma‑separated):** ``` s002 1 1 0.1491 0.3979 0.2488 ... 0.0742 ``` You can copy this directly from Excel or a CSV file. """) # Load features list preprocessor = load_preprocessor() feature_cols = preprocessor.transformers_[0][2] st.markdown("### 🧾 Feature Format") st.code(", ".join(feature_cols), language="text") # Textarea for user input input_text = st.text_area( "Paste **one row** of keystroke timing data below:", placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)", height=140 ) if st.button("🔍 Predict"): if not input_text.strip(): st.warning("⚠️ Please paste a row of values to proceed.") return try: # Use tab if present, else comma delimiter = '\t' if '\t' in input_text else ',' df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter) # Trim leading columns until the number matches feature_cols while df_input.shape[1] > len(feature_cols): df_input = df_input.drop(columns=[df_input.columns[0]]) if df_input.shape[1] != len(feature_cols): st.error(f"❌ Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.") return df_input.columns = feature_cols except Exception as e: st.error(f"❌ Could not parse input: {e}") return if df_input.shape[0] != 1: st.error(f"❌ Expected exactly 1 row, but got {df_input.shape[0]}.") return st.success("✅ Row parsed successfully!") st.write("### 🔍 Parsed Input") st.dataframe(df_input, use_container_width=True) try: df_pred = predict_subjects(df_input) st.write("### 🎯 Prediction Result") st.dataframe(df_pred, use_container_width=True) except KeyError as e: st.error(f"Missing feature (typo?): {e}") except Exception as e: st.error(f"Prediction error: {e}") if __name__ == "__main__": main()