kda / app.py
MrUtakata's picture
Update app.py
8769ca6 verified
# ─── streamlit_app.py ────────────────────────────────────────────────────────
import streamlit as st
# ─── MUST BE FIRST ───────────────────────────────────────────────────────────
st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from io import StringIO
# ─── Caching loaders so they only run once per session ───────────────────────
@st.cache_resource
def load_preprocessor(path="preprocessor.pkl"):
return joblib.load(path)
@st.cache_resource
def load_label_encoder(path="label_encoder.pkl"):
return joblib.load(path)
@st.cache_resource
def load_model(path="keystroke_dnn.h5"):
return tf.keras.models.load_model(path)
# ─── Prediction helper ───────────────────────────────────────────────────────
def predict_subjects(df_raw):
preprocessor = load_preprocessor()
label_encoder = load_label_encoder()
model = load_model()
# Drop non-feature columns if present
for c in ("subject", "sessionIndex", "rep"):
if c in df_raw.columns:
df_raw = df_raw.drop(columns=[c])
# Re-order and validate features
feature_cols = preprocessor.transformers_[0][2]
df_features = df_raw[feature_cols]
# Predict
X_scaled = preprocessor.transform(df_features)
y_prob = model.predict(X_scaled)
idx_pred = np.argmax(y_prob, axis=1)
labels = label_encoder.categories_[0][idx_pred]
# Build result
df_out = pd.DataFrame({"Predicted Subject": labels})
for i, cls in enumerate(label_encoder.categories_[0]):
df_out[f"Probability {cls}"] = y_prob[:, i]
return df_out
# ─── Streamlit App ────────────────────────────────────────────────────────────
def main():
st.title("πŸ” Keystroke Dynamics Authenticator")
st.markdown("""
This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm.
**How it works:**
You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it.
""")
with st.expander("ℹ️ About the data format", expanded=False):
st.markdown("""
Each row includes 34 values:
- First 3 columns (optional):
`subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50)
- Next 31 columns:
Hold times (how long each key was pressed) and latency features:
- `H.key`: time a key was held
- `DD.key1.key2`: time between pressing key1 and key2
- `UD.key1.key2`: time between releasing key1 and pressing key2
**Example row (tab or comma‑separated):**
```
s002 1 1 0.1491 0.3979 0.2488 ... 0.0742
```
You can copy this directly from Excel or a CSV file.
""")
# Load features list
preprocessor = load_preprocessor()
feature_cols = preprocessor.transformers_[0][2]
st.markdown("### 🧾 Feature Format")
st.code(", ".join(feature_cols), language="text")
# Textarea for user input
input_text = st.text_area(
"Paste **one row** of keystroke timing data below:",
placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)",
height=140
)
if st.button("πŸ” Predict"):
if not input_text.strip():
st.warning("⚠️ Please paste a row of values to proceed.")
return
try:
# Use tab if present, else comma
delimiter = '\t' if '\t' in input_text else ','
df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter)
# Trim leading columns until the number matches feature_cols
while df_input.shape[1] > len(feature_cols):
df_input = df_input.drop(columns=[df_input.columns[0]])
if df_input.shape[1] != len(feature_cols):
st.error(f"❌ Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.")
return
df_input.columns = feature_cols
except Exception as e:
st.error(f"❌ Could not parse input: {e}")
return
if df_input.shape[0] != 1:
st.error(f"❌ Expected exactly 1 row, but got {df_input.shape[0]}.")
return
st.success("βœ… Row parsed successfully!")
st.write("### πŸ” Parsed Input")
st.dataframe(df_input, use_container_width=True)
try:
df_pred = predict_subjects(df_input)
st.write("### 🎯 Prediction Result")
st.dataframe(df_pred, use_container_width=True)
except KeyError as e:
st.error(f"Missing feature (typo?): {e}")
except Exception as e:
st.error(f"Prediction error: {e}")
if __name__ == "__main__":
main()