MrUtakata commited on
Commit
f7f3a00
Β·
verified Β·
1 Parent(s): 44147a0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import joblib
6
+ import tensorflow as tf
7
+
8
+ # ─── Caching loaders so they only run once ───────────────────────────────────
9
+ @st.cache(allow_output_mutation=True)
10
+ def load_preprocessor(path="preprocessor.pkl"):
11
+ return joblib.load(path)
12
+
13
+ @st.cache(allow_output_mutation=True)
14
+ def load_label_encoder(path="label_encoder.pkl"):
15
+ return joblib.load(path)
16
+
17
+ @st.cache(allow_output_mutation=True)
18
+ def load_model(path="keystroke_dnn.h5"):
19
+ return tf.keras.models.load_model(path)
20
+
21
+ # ─── Prediction function ────────────────────────────────────────────────────
22
+ def predict_subjects(df_raw):
23
+ """
24
+ Takes a DataFrame of raw keystroke features, drops any
25
+ 'subject'/'sessionIndex'/'rep' columns, re-orders to the
26
+ exact list the preprocessor saw at train-time, scales,
27
+ runs the DNN, and returns predicted IDs + probabilities.
28
+ """
29
+ preprocessor = load_preprocessor()
30
+ label_encoder = load_label_encoder()
31
+ model = load_model()
32
+
33
+ # 1) Drop unused cols if present
34
+ for c in ("subject", "sessionIndex", "rep"):
35
+ if c in df_raw.columns:
36
+ df_raw = df_raw.drop(columns=[c])
37
+
38
+ # 2) Re-order columns exactly as during training
39
+ feature_cols = preprocessor.transformers_[0][2]
40
+ df_features = df_raw[feature_cols]
41
+
42
+ # 3) Scale
43
+ X_scaled = preprocessor.transform(df_features)
44
+
45
+ # 4) Model inference
46
+ y_prob = model.predict(X_scaled)
47
+ idx_pred = np.argmax(y_prob, axis=1)
48
+
49
+ # 5) Decode one‐hot back to original labels
50
+ labels = label_encoder.categories_[0][idx_pred]
51
+
52
+ # 6) Build output
53
+ df_out = pd.DataFrame({"predicted_subject": labels})
54
+ for i, cls in enumerate(label_encoder.categories_[0]):
55
+ df_out[f"prob_{cls}"] = y_prob[:, i]
56
+
57
+ return df_out
58
+
59
+ # ─── Streamlit UI ──────────────────────────────────────────────────────────
60
+ def main():
61
+ st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")
62
+ st.title("πŸ”‘ Keystroke Dynamics Authentication")
63
+ st.markdown(
64
+ "Upload a CSV of raw keystroke‐feature vectors (one row per sample). "
65
+ "The app will drop any `subject`/`sessionIndex`/`rep` columns, scale, "
66
+ "run through the DNN, and return predicted subject IDs + confidence scores."
67
+ )
68
+
69
+ uploaded = st.file_uploader("Choose CSV file", type="csv")
70
+ if not uploaded:
71
+ return
72
+
73
+ df = pd.read_csv(uploaded)
74
+ st.write("### Raw feature preview (first 5 rows)")
75
+ st.dataframe(df.head(), use_container_width=True)
76
+
77
+ try:
78
+ df_preds = predict_subjects(df)
79
+ st.write("### Predictions")
80
+ st.dataframe(df_preds, use_container_width=True)
81
+ except KeyError as e:
82
+ st.error(f"Missing expected feature column: {e}")
83
+ except Exception as e:
84
+ st.error(f"Error during prediction: {e}")
85
+
86
+ if __name__ == "__main__":
87
+ main()