File size: 5,410 Bytes
25d2c15 f7f3a00 25d2c15 015c6bc 25d2c15 f7f3a00 dc13330 f7f3a00 015c6bc 25d2c15 f7f3a00 25d2c15 f7f3a00 25d2c15 f7f3a00 015c6bc f7f3a00 8769ca6 f7f3a00 8769ca6 f7f3a00 8769ca6 f7f3a00 8769ca6 f7f3a00 8769ca6 dc13330 f7f3a00 f00e385 f7f3a00 8769ca6 f00e385 015c6bc 8769ca6 dc13330 8769ca6 dc13330 8769ca6 dc13330 8769ca6 dc13330 8769ca6 dc13330 8769ca6 f00e385 8769ca6 f00e385 8769ca6 f00e385 8769ca6 f00e385 dc13330 8769ca6 dc13330 015c6bc dc13330 8769ca6 dc13330 015c6bc 8769ca6 015c6bc 8769ca6 015c6bc f7f3a00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# βββ streamlit_app.py ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
import streamlit as st
# βββ MUST BE FIRST βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")
import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from io import StringIO
# βββ Caching loaders so they only run once per session βββββββββββββββββββββββ
@st.cache_resource
def load_preprocessor(path="preprocessor.pkl"):
return joblib.load(path)
@st.cache_resource
def load_label_encoder(path="label_encoder.pkl"):
return joblib.load(path)
@st.cache_resource
def load_model(path="keystroke_dnn.h5"):
return tf.keras.models.load_model(path)
# βββ Prediction helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def predict_subjects(df_raw):
preprocessor = load_preprocessor()
label_encoder = load_label_encoder()
model = load_model()
# Drop non-feature columns if present
for c in ("subject", "sessionIndex", "rep"):
if c in df_raw.columns:
df_raw = df_raw.drop(columns=[c])
# Re-order and validate features
feature_cols = preprocessor.transformers_[0][2]
df_features = df_raw[feature_cols]
# Predict
X_scaled = preprocessor.transform(df_features)
y_prob = model.predict(X_scaled)
idx_pred = np.argmax(y_prob, axis=1)
labels = label_encoder.categories_[0][idx_pred]
# Build result
df_out = pd.DataFrame({"Predicted Subject": labels})
for i, cls in enumerate(label_encoder.categories_[0]):
df_out[f"Probability {cls}"] = y_prob[:, i]
return df_out
# βββ Streamlit App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def main():
st.title("π Keystroke Dynamics Authenticator")
st.markdown("""
This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm.
**How it works:**
You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it.
""")
with st.expander("βΉοΈ About the data format", expanded=False):
st.markdown("""
Each row includes 34 values:
- First 3 columns (optional):
`subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50)
- Next 31 columns:
Hold times (how long each key was pressed) and latency features:
- `H.key`: time a key was held
- `DD.key1.key2`: time between pressing key1 and key2
- `UD.key1.key2`: time between releasing key1 and pressing key2
**Example row (tab or commaβseparated):**
```
s002 1 1 0.1491 0.3979 0.2488 ... 0.0742
```
You can copy this directly from Excel or a CSV file.
""")
# Load features list
preprocessor = load_preprocessor()
feature_cols = preprocessor.transformers_[0][2]
st.markdown("### π§Ύ Feature Format")
st.code(", ".join(feature_cols), language="text")
# Textarea for user input
input_text = st.text_area(
"Paste **one row** of keystroke timing data below:",
placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)",
height=140
)
if st.button("π Predict"):
if not input_text.strip():
st.warning("β οΈ Please paste a row of values to proceed.")
return
try:
# Use tab if present, else comma
delimiter = '\t' if '\t' in input_text else ','
df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter)
# Trim leading columns until the number matches feature_cols
while df_input.shape[1] > len(feature_cols):
df_input = df_input.drop(columns=[df_input.columns[0]])
if df_input.shape[1] != len(feature_cols):
st.error(f"β Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.")
return
df_input.columns = feature_cols
except Exception as e:
st.error(f"β Could not parse input: {e}")
return
if df_input.shape[0] != 1:
st.error(f"β Expected exactly 1 row, but got {df_input.shape[0]}.")
return
st.success("β
Row parsed successfully!")
st.write("### π Parsed Input")
st.dataframe(df_input, use_container_width=True)
try:
df_pred = predict_subjects(df_input)
st.write("### π― Prediction Result")
st.dataframe(df_pred, use_container_width=True)
except KeyError as e:
st.error(f"Missing feature (typo?): {e}")
except Exception as e:
st.error(f"Prediction error: {e}")
if __name__ == "__main__":
main()
|