File size: 5,410 Bytes
25d2c15
 
f7f3a00
25d2c15
015c6bc
25d2c15
 
f7f3a00
 
 
 
dc13330
f7f3a00
015c6bc
25d2c15
f7f3a00
 
 
25d2c15
f7f3a00
 
 
25d2c15
f7f3a00
 
 
015c6bc
f7f3a00
 
 
 
 
8769ca6
f7f3a00
 
 
 
8769ca6
f7f3a00
 
 
8769ca6
f7f3a00
 
 
 
 
8769ca6
 
f7f3a00
8769ca6
dc13330
f7f3a00
 
f00e385
f7f3a00
8769ca6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f00e385
 
015c6bc
8769ca6
dc13330
 
8769ca6
dc13330
8769ca6
 
 
dc13330
 
8769ca6
dc13330
8769ca6
dc13330
 
 
8769ca6
f00e385
 
8769ca6
f00e385
8769ca6
f00e385
 
 
 
8769ca6
f00e385
 
 
 
dc13330
8769ca6
dc13330
015c6bc
dc13330
8769ca6
dc13330
015c6bc
8769ca6
 
015c6bc
 
 
 
8769ca6
015c6bc
 
 
 
 
f7f3a00
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# ─── streamlit_app.py ────────────────────────────────────────────────────────

import streamlit as st

# ─── MUST BE FIRST ───────────────────────────────────────────────────────────
st.set_page_config(page_title="Keystroke Dynamics Auth", layout="wide")

import pandas as pd
import numpy as np
import joblib
import tensorflow as tf
from io import StringIO

# ─── Caching loaders so they only run once per session ───────────────────────
@st.cache_resource
def load_preprocessor(path="preprocessor.pkl"):
    return joblib.load(path)

@st.cache_resource
def load_label_encoder(path="label_encoder.pkl"):
    return joblib.load(path)

@st.cache_resource
def load_model(path="keystroke_dnn.h5"):
    return tf.keras.models.load_model(path)

# ─── Prediction helper ───────────────────────────────────────────────────────
def predict_subjects(df_raw):
    preprocessor  = load_preprocessor()
    label_encoder = load_label_encoder()
    model         = load_model()

    # Drop non-feature columns if present
    for c in ("subject", "sessionIndex", "rep"):
        if c in df_raw.columns:
            df_raw = df_raw.drop(columns=[c])

    # Re-order and validate features
    feature_cols = preprocessor.transformers_[0][2]
    df_features  = df_raw[feature_cols]

    # Predict
    X_scaled = preprocessor.transform(df_features)
    y_prob   = model.predict(X_scaled)
    idx_pred = np.argmax(y_prob, axis=1)
    labels   = label_encoder.categories_[0][idx_pred]

    # Build result
    df_out = pd.DataFrame({"Predicted Subject": labels})
    for i, cls in enumerate(label_encoder.categories_[0]):
        df_out[f"Probability {cls}"] = y_prob[:, i]

    return df_out

# ─── Streamlit App ────────────────────────────────────────────────────────────
def main():
    st.title("πŸ” Keystroke Dynamics Authenticator")
    st.markdown("""
    This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm.

    **How it works:**  
    You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it.  
    """)
    
    with st.expander("ℹ️ About the data format", expanded=False):
        st.markdown("""
        Each row includes 34 values:
        - First 3 columns (optional):  
          `subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50)
        - Next 31 columns:  
          Hold times (how long each key was pressed) and latency features:
            - `H.key`: time a key was held
            - `DD.key1.key2`: time between pressing key1 and key2
            - `UD.key1.key2`: time between releasing key1 and pressing key2

        **Example row (tab or comma‑separated):**
        ```
        s002	1	1	0.1491	0.3979	0.2488	...	0.0742
        ```

        You can copy this directly from Excel or a CSV file.
        """)

    # Load features list
    preprocessor = load_preprocessor()
    feature_cols = preprocessor.transformers_[0][2]

    st.markdown("### 🧾 Feature Format")
    st.code(", ".join(feature_cols), language="text")

    # Textarea for user input
    input_text = st.text_area(
        "Paste **one row** of keystroke timing data below:",
        placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)",
        height=140
    )

    if st.button("πŸ” Predict"):
        if not input_text.strip():
            st.warning("⚠️ Please paste a row of values to proceed.")
            return

        try:
            # Use tab if present, else comma
            delimiter = '\t' if '\t' in input_text else ','

            df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter)

            # Trim leading columns until the number matches feature_cols
            while df_input.shape[1] > len(feature_cols):
                df_input = df_input.drop(columns=[df_input.columns[0]])

            if df_input.shape[1] != len(feature_cols):
                st.error(f"❌ Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.")
                return

            df_input.columns = feature_cols

        except Exception as e:
            st.error(f"❌ Could not parse input: {e}")
            return

        if df_input.shape[0] != 1:
            st.error(f"❌ Expected exactly 1 row, but got {df_input.shape[0]}.")
            return

        st.success("βœ… Row parsed successfully!")
        st.write("### πŸ” Parsed Input")
        st.dataframe(df_input, use_container_width=True)

        try:
            df_pred = predict_subjects(df_input)
            st.write("### 🎯 Prediction Result")
            st.dataframe(df_pred, use_container_width=True)
        except KeyError as e:
            st.error(f"Missing feature (typo?): {e}")
        except Exception as e:
            st.error(f"Prediction error: {e}")

if __name__ == "__main__":
    main()