MrUtakata commited on
Commit
8769ca6
Β·
verified Β·
1 Parent(s): f00e385

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -33
app.py CHANGED
@@ -30,90 +30,107 @@ def predict_subjects(df_raw):
30
  label_encoder = load_label_encoder()
31
  model = load_model()
32
 
33
- # Drop any stray columns
34
  for c in ("subject", "sessionIndex", "rep"):
35
  if c in df_raw.columns:
36
  df_raw = df_raw.drop(columns=[c])
37
 
38
- # Re-order to exact feature list
39
  feature_cols = preprocessor.transformers_[0][2]
40
  df_features = df_raw[feature_cols]
41
 
42
- # Scale, predict, decode
43
  X_scaled = preprocessor.transform(df_features)
44
  y_prob = model.predict(X_scaled)
45
  idx_pred = np.argmax(y_prob, axis=1)
46
  labels = label_encoder.categories_[0][idx_pred]
47
 
48
- # Build output table
49
- df_out = pd.DataFrame({"predicted_subject": labels})
50
  for i, cls in enumerate(label_encoder.categories_[0]):
51
- df_out[f"prob_{cls}"] = y_prob[:, i]
52
 
53
  return df_out
54
 
55
  # ─── Streamlit App ────────────────────────────────────────────────────────────
56
  def main():
57
- st.title("πŸ”‘ Keystroke Dynamics Authentication")
58
- st.markdown(
59
- """
60
- Paste exactly **one** row of feature values (tab or comma‑separated, no header).
61
- The system will ignore `subject`, `sessionIndex`, or `rep` if present.
62
- """
63
- )
64
-
65
- # Load features list for display and parsing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  preprocessor = load_preprocessor()
67
  feature_cols = preprocessor.transformers_[0][2]
68
 
69
- st.write("**Expected feature order:**")
70
  st.code(", ".join(feature_cols), language="text")
71
 
72
- # Textarea for single-row input
73
  input_text = st.text_area(
74
- "Paste your row here (e.g. from Excel or CSV, including subject if present):",
75
- height=120
 
76
  )
77
 
78
- if st.button("Predict"):
79
  if not input_text.strip():
80
- st.warning("Please paste one row of values.")
81
  return
82
 
83
  try:
84
- # Try tab-delimited first, fallback to comma
85
  delimiter = '\t' if '\t' in input_text else ','
86
 
87
- df_input = pd.read_csv(
88
- StringIO(input_text.strip()),
89
- header=None,
90
- sep=delimiter
91
- )
92
 
93
- # Remove leading columns until number matches feature_cols
94
  while df_input.shape[1] > len(feature_cols):
95
  df_input = df_input.drop(columns=[df_input.columns[0]])
96
 
97
  if df_input.shape[1] != len(feature_cols):
98
- st.error(f"Expected {len(feature_cols)} features, got {df_input.shape[1]}.")
99
  return
100
 
101
  df_input.columns = feature_cols
102
 
103
  except Exception as e:
104
- st.error(f"Could not parse input: {e}")
105
  return
106
 
107
  if df_input.shape[0] != 1:
108
- st.error(f"Expected exactly 1 row, but got {df_input.shape[0]}.")
109
  return
110
 
111
- st.write("### Parsed Input")
 
112
  st.dataframe(df_input, use_container_width=True)
113
 
114
  try:
115
  df_pred = predict_subjects(df_input)
116
- st.write("### Prediction")
117
  st.dataframe(df_pred, use_container_width=True)
118
  except KeyError as e:
119
  st.error(f"Missing feature (typo?): {e}")
 
30
  label_encoder = load_label_encoder()
31
  model = load_model()
32
 
33
+ # Drop non-feature columns if present
34
  for c in ("subject", "sessionIndex", "rep"):
35
  if c in df_raw.columns:
36
  df_raw = df_raw.drop(columns=[c])
37
 
38
+ # Re-order and validate features
39
  feature_cols = preprocessor.transformers_[0][2]
40
  df_features = df_raw[feature_cols]
41
 
42
+ # Predict
43
  X_scaled = preprocessor.transform(df_features)
44
  y_prob = model.predict(X_scaled)
45
  idx_pred = np.argmax(y_prob, axis=1)
46
  labels = label_encoder.categories_[0][idx_pred]
47
 
48
+ # Build result
49
+ df_out = pd.DataFrame({"Predicted Subject": labels})
50
  for i, cls in enumerate(label_encoder.categories_[0]):
51
+ df_out[f"Probability {cls}"] = y_prob[:, i]
52
 
53
  return df_out
54
 
55
  # ─── Streamlit App ────────────────────────────────────────────────────────────
56
  def main():
57
+ st.title("πŸ” Keystroke Dynamics Authenticator")
58
+ st.markdown("""
59
+ This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm.
60
+
61
+ **How it works:**
62
+ You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it.
63
+ """)
64
+
65
+ with st.expander("ℹ️ About the data format", expanded=False):
66
+ st.markdown("""
67
+ Each row includes 34 values:
68
+ - First 3 columns (optional):
69
+ `subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50)
70
+ - Next 31 columns:
71
+ Hold times (how long each key was pressed) and latency features:
72
+ - `H.key`: time a key was held
73
+ - `DD.key1.key2`: time between pressing key1 and key2
74
+ - `UD.key1.key2`: time between releasing key1 and pressing key2
75
+
76
+ **Example row (tab or comma‑separated):**
77
+ ```
78
+ s002 1 1 0.1491 0.3979 0.2488 ... 0.0742
79
+ ```
80
+
81
+ You can copy this directly from Excel or a CSV file.
82
+ """)
83
+
84
+ # Load features list
85
  preprocessor = load_preprocessor()
86
  feature_cols = preprocessor.transformers_[0][2]
87
 
88
+ st.markdown("### 🧾 Feature Format")
89
  st.code(", ".join(feature_cols), language="text")
90
 
91
+ # Textarea for user input
92
  input_text = st.text_area(
93
+ "Paste **one row** of keystroke timing data below:",
94
+ placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)",
95
+ height=140
96
  )
97
 
98
+ if st.button("πŸ” Predict"):
99
  if not input_text.strip():
100
+ st.warning("⚠️ Please paste a row of values to proceed.")
101
  return
102
 
103
  try:
104
+ # Use tab if present, else comma
105
  delimiter = '\t' if '\t' in input_text else ','
106
 
107
+ df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter)
 
 
 
 
108
 
109
+ # Trim leading columns until the number matches feature_cols
110
  while df_input.shape[1] > len(feature_cols):
111
  df_input = df_input.drop(columns=[df_input.columns[0]])
112
 
113
  if df_input.shape[1] != len(feature_cols):
114
+ st.error(f"❌ Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.")
115
  return
116
 
117
  df_input.columns = feature_cols
118
 
119
  except Exception as e:
120
+ st.error(f"❌ Could not parse input: {e}")
121
  return
122
 
123
  if df_input.shape[0] != 1:
124
+ st.error(f"❌ Expected exactly 1 row, but got {df_input.shape[0]}.")
125
  return
126
 
127
+ st.success("βœ… Row parsed successfully!")
128
+ st.write("### πŸ” Parsed Input")
129
  st.dataframe(df_input, use_container_width=True)
130
 
131
  try:
132
  df_pred = predict_subjects(df_input)
133
+ st.write("### 🎯 Prediction Result")
134
  st.dataframe(df_pred, use_container_width=True)
135
  except KeyError as e:
136
  st.error(f"Missing feature (typo?): {e}")