kda / app.py
MrUtakata's picture
Create app.py
f7f3a00 verified
raw
history blame
3.21 kB
# streamlit_app.py
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
# ─── Caching loaders so they only run once ───────────────────────────────────
@st.cache(allow_output_mutation=True)
def load_preprocessor(path="preprocessor.pkl"):
return joblib.load(path)
@st.cache(allow_output_mutation=True)
def load_label_encoder(path="label_encoder.pkl"):
return joblib.load(path)
@st.cache(allow_output_mutation=True)
def load_model(path="keystroke_dnn.h5"):
return tf.keras.models.load_model(path)
# ─── Prediction function ────────────────────────────────────────────────────
def predict_subjects(df_raw):
"""
Takes a DataFrame of raw keystroke features, drops any
'subject'/'sessionIndex'/'rep' columns, re-orders to the
exact list the preprocessor saw at train-time, scales,
runs the DNN, and returns predicted IDs + probabilities.
"""
preprocessor = load_preprocessor()
label_encoder = load_label_encoder()
model = load_model()
# 1) Drop unused cols if present
for c in ("subject", "sessionIndex", "rep"):
if c in df_raw.columns:
df_raw = df_raw.drop(columns=[c])
# 2) Re-order columns exactly as during training
feature_cols = preprocessor.transformers_[0][2]
df_features = df_raw[feature_cols]
# 3) Scale
X_scaled = preprocessor.transform(df_features)
# 4) Model inference
y_prob = model.predict(X_scaled)
idx_pred = np.argmax(y_prob, axis=1)
# 5) Decode one‐hot back to original labels
labels = label_encoder.categories_[0][idx_pred]
# 6) Build output
df_out = pd.DataFrame({"predicted_subject": labels})
for i, cls in enumerate(label_encoder.categories_[0]):
df_out[f"prob_{cls}"] = y_prob[:, i]
return df_out
# ─── Streamlit UI ──────────────────────────────────────────────────────────
def main():
st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")
st.title("πŸ”‘ Keystroke Dynamics Authentication")
st.markdown(
"Upload a CSV of raw keystroke‐feature vectors (one row per sample). "
"The app will drop any `subject`/`sessionIndex`/`rep` columns, scale, "
"run through the DNN, and return predicted subject IDs + confidence scores."
)
uploaded = st.file_uploader("Choose CSV file", type="csv")
if not uploaded:
return
df = pd.read_csv(uploaded)
st.write("### Raw feature preview (first 5 rows)")
st.dataframe(df.head(), use_container_width=True)
try:
df_preds = predict_subjects(df)
st.write("### Predictions")
st.dataframe(df_preds, use_container_width=True)
except KeyError as e:
st.error(f"Missing expected feature column: {e}")
except Exception as e:
st.error(f"Error during prediction: {e}")
if __name__ == "__main__":
main()