Update app.py
Browse files
app.py
CHANGED
|
@@ -30,90 +30,107 @@ def predict_subjects(df_raw):
|
|
| 30 |
label_encoder = load_label_encoder()
|
| 31 |
model = load_model()
|
| 32 |
|
| 33 |
-
# Drop
|
| 34 |
for c in ("subject", "sessionIndex", "rep"):
|
| 35 |
if c in df_raw.columns:
|
| 36 |
df_raw = df_raw.drop(columns=[c])
|
| 37 |
|
| 38 |
-
# Re-order
|
| 39 |
feature_cols = preprocessor.transformers_[0][2]
|
| 40 |
df_features = df_raw[feature_cols]
|
| 41 |
|
| 42 |
-
#
|
| 43 |
X_scaled = preprocessor.transform(df_features)
|
| 44 |
y_prob = model.predict(X_scaled)
|
| 45 |
idx_pred = np.argmax(y_prob, axis=1)
|
| 46 |
labels = label_encoder.categories_[0][idx_pred]
|
| 47 |
|
| 48 |
-
# Build
|
| 49 |
-
df_out = pd.DataFrame({"
|
| 50 |
for i, cls in enumerate(label_encoder.categories_[0]):
|
| 51 |
-
df_out[f"
|
| 52 |
|
| 53 |
return df_out
|
| 54 |
|
| 55 |
# βββ Streamlit App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
def main():
|
| 57 |
-
st.title("
|
| 58 |
-
st.markdown(
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
preprocessor = load_preprocessor()
|
| 67 |
feature_cols = preprocessor.transformers_[0][2]
|
| 68 |
|
| 69 |
-
st.
|
| 70 |
st.code(", ".join(feature_cols), language="text")
|
| 71 |
|
| 72 |
-
# Textarea for
|
| 73 |
input_text = st.text_area(
|
| 74 |
-
"Paste
|
| 75 |
-
|
|
|
|
| 76 |
)
|
| 77 |
|
| 78 |
-
if st.button("Predict"):
|
| 79 |
if not input_text.strip():
|
| 80 |
-
st.warning("Please paste
|
| 81 |
return
|
| 82 |
|
| 83 |
try:
|
| 84 |
-
#
|
| 85 |
delimiter = '\t' if '\t' in input_text else ','
|
| 86 |
|
| 87 |
-
df_input = pd.read_csv(
|
| 88 |
-
StringIO(input_text.strip()),
|
| 89 |
-
header=None,
|
| 90 |
-
sep=delimiter
|
| 91 |
-
)
|
| 92 |
|
| 93 |
-
#
|
| 94 |
while df_input.shape[1] > len(feature_cols):
|
| 95 |
df_input = df_input.drop(columns=[df_input.columns[0]])
|
| 96 |
|
| 97 |
if df_input.shape[1] != len(feature_cols):
|
| 98 |
-
st.error(f"Expected {len(feature_cols)}
|
| 99 |
return
|
| 100 |
|
| 101 |
df_input.columns = feature_cols
|
| 102 |
|
| 103 |
except Exception as e:
|
| 104 |
-
st.error(f"Could not parse input: {e}")
|
| 105 |
return
|
| 106 |
|
| 107 |
if df_input.shape[0] != 1:
|
| 108 |
-
st.error(f"Expected exactly 1 row, but got {df_input.shape[0]}.")
|
| 109 |
return
|
| 110 |
|
| 111 |
-
st.
|
|
|
|
| 112 |
st.dataframe(df_input, use_container_width=True)
|
| 113 |
|
| 114 |
try:
|
| 115 |
df_pred = predict_subjects(df_input)
|
| 116 |
-
st.write("### Prediction")
|
| 117 |
st.dataframe(df_pred, use_container_width=True)
|
| 118 |
except KeyError as e:
|
| 119 |
st.error(f"Missing feature (typo?): {e}")
|
|
|
|
| 30 |
label_encoder = load_label_encoder()
|
| 31 |
model = load_model()
|
| 32 |
|
| 33 |
+
# Drop non-feature columns if present
|
| 34 |
for c in ("subject", "sessionIndex", "rep"):
|
| 35 |
if c in df_raw.columns:
|
| 36 |
df_raw = df_raw.drop(columns=[c])
|
| 37 |
|
| 38 |
+
# Re-order and validate features
|
| 39 |
feature_cols = preprocessor.transformers_[0][2]
|
| 40 |
df_features = df_raw[feature_cols]
|
| 41 |
|
| 42 |
+
# Predict
|
| 43 |
X_scaled = preprocessor.transform(df_features)
|
| 44 |
y_prob = model.predict(X_scaled)
|
| 45 |
idx_pred = np.argmax(y_prob, axis=1)
|
| 46 |
labels = label_encoder.categories_[0][idx_pred]
|
| 47 |
|
| 48 |
+
# Build result
|
| 49 |
+
df_out = pd.DataFrame({"Predicted Subject": labels})
|
| 50 |
for i, cls in enumerate(label_encoder.categories_[0]):
|
| 51 |
+
df_out[f"Probability {cls}"] = y_prob[:, i]
|
| 52 |
|
| 53 |
return df_out
|
| 54 |
|
| 55 |
# βββ Streamlit App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
def main():
|
| 57 |
+
st.title("π Keystroke Dynamics Authenticator")
|
| 58 |
+
st.markdown("""
|
| 59 |
+
This app uses **keystroke timing data** to identify who typed a password based on their typing rhythm.
|
| 60 |
+
|
| 61 |
+
**How it works:**
|
| 62 |
+
You paste one row of keystroke data (copied from Excel or CSV), and the system predicts the subject who likely typed it.
|
| 63 |
+
""")
|
| 64 |
+
|
| 65 |
+
with st.expander("βΉοΈ About the data format", expanded=False):
|
| 66 |
+
st.markdown("""
|
| 67 |
+
Each row includes 34 values:
|
| 68 |
+
- First 3 columns (optional):
|
| 69 |
+
`subject` (ID like s002), `sessionIndex` (1-8), and `rep` (1-50)
|
| 70 |
+
- Next 31 columns:
|
| 71 |
+
Hold times (how long each key was pressed) and latency features:
|
| 72 |
+
- `H.key`: time a key was held
|
| 73 |
+
- `DD.key1.key2`: time between pressing key1 and key2
|
| 74 |
+
- `UD.key1.key2`: time between releasing key1 and pressing key2
|
| 75 |
+
|
| 76 |
+
**Example row (tab or commaβseparated):**
|
| 77 |
+
```
|
| 78 |
+
s002 1 1 0.1491 0.3979 0.2488 ... 0.0742
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
You can copy this directly from Excel or a CSV file.
|
| 82 |
+
""")
|
| 83 |
+
|
| 84 |
+
# Load features list
|
| 85 |
preprocessor = load_preprocessor()
|
| 86 |
feature_cols = preprocessor.transformers_[0][2]
|
| 87 |
|
| 88 |
+
st.markdown("### π§Ύ Feature Format")
|
| 89 |
st.code(", ".join(feature_cols), language="text")
|
| 90 |
|
| 91 |
+
# Textarea for user input
|
| 92 |
input_text = st.text_area(
|
| 93 |
+
"Paste **one row** of keystroke timing data below:",
|
| 94 |
+
placeholder="s002\t1\t1\t0.1491\t0.3979\t0.2488\t... (31 timing values)",
|
| 95 |
+
height=140
|
| 96 |
)
|
| 97 |
|
| 98 |
+
if st.button("π Predict"):
|
| 99 |
if not input_text.strip():
|
| 100 |
+
st.warning("β οΈ Please paste a row of values to proceed.")
|
| 101 |
return
|
| 102 |
|
| 103 |
try:
|
| 104 |
+
# Use tab if present, else comma
|
| 105 |
delimiter = '\t' if '\t' in input_text else ','
|
| 106 |
|
| 107 |
+
df_input = pd.read_csv(StringIO(input_text.strip()), header=None, sep=delimiter)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
+
# Trim leading columns until the number matches feature_cols
|
| 110 |
while df_input.shape[1] > len(feature_cols):
|
| 111 |
df_input = df_input.drop(columns=[df_input.columns[0]])
|
| 112 |
|
| 113 |
if df_input.shape[1] != len(feature_cols):
|
| 114 |
+
st.error(f"β Expected {len(feature_cols)} feature values, but got {df_input.shape[1]}.")
|
| 115 |
return
|
| 116 |
|
| 117 |
df_input.columns = feature_cols
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
+
st.error(f"β Could not parse input: {e}")
|
| 121 |
return
|
| 122 |
|
| 123 |
if df_input.shape[0] != 1:
|
| 124 |
+
st.error(f"β Expected exactly 1 row, but got {df_input.shape[0]}.")
|
| 125 |
return
|
| 126 |
|
| 127 |
+
st.success("β
Row parsed successfully!")
|
| 128 |
+
st.write("### π Parsed Input")
|
| 129 |
st.dataframe(df_input, use_container_width=True)
|
| 130 |
|
| 131 |
try:
|
| 132 |
df_pred = predict_subjects(df_input)
|
| 133 |
+
st.write("### π― Prediction Result")
|
| 134 |
st.dataframe(df_pred, use_container_width=True)
|
| 135 |
except KeyError as e:
|
| 136 |
st.error(f"Missing feature (typo?): {e}")
|