import streamlit as st import pandas as pd import numpy as np import os import gdown import pickle import joblib # ---------------------- Download and Load Ensemble Model ---------------------- ensemble_model_path = "ensemble_model.pkl" if not os.path.exists(ensemble_model_path): url = "https://drive.google.com/uc?export=download&id=1jHtHOzfhtWMyYqX_pbQJ5akYAe6ZhfhU" gdown.download(url, ensemble_model_path, quiet=False) ensemble = joblib.load(ensemble_model_path) # ---------------------- Load Preprocessing Objects ---------------------- encoder = joblib.load("onehotencoder.pkl") scaler = joblib.load("scaler.pkl") # ---------------------- Set up the Streamlit App ---------------------- st.title("Customer Churn Predictor") st.write(""" This app uses a trained machine learning model to predict whether a customer is likely to churn. Please enter the customer details below. """) # ---------------------- User Inputs ---------------------- st.header("Customer Details") age = st.number_input("Age", min_value=18, max_value=100, value=30) tenure = st.number_input("Tenure (in months)", min_value=0, max_value=120, value=12) usage_frequency = st.number_input("Usage Frequency", min_value=0, max_value=100, value=5) support_calls = st.number_input("Support Calls", min_value=0, max_value=50, value=2) payment_delay = st.number_input("Payment Delay (days)", min_value=0, max_value=365, value=5) total_spend = st.number_input("Total Spend", min_value=0.0, max_value=10000.0, value=100.0, step=10.0) last_interaction = st.number_input("Last Interaction (days ago)", min_value=0, max_value=365, value=15) # Match categories used during training gender = st.selectbox("Gender", options=["Male", "Female"]) subscription_type = st.selectbox("Subscription Type", options=["Premium", "Standard"]) contract_length = st.selectbox("Contract Length", options=["Monthly", "Quarterly"]) # Create a DataFrame for the input input_df = pd.DataFrame({ "Age": [age], "Tenure": [tenure], "Usage Frequency": [usage_frequency], "Support Calls": [support_calls], "Payment Delay": [payment_delay], "Total Spend": [total_spend], "Last Interaction": [last_interaction], "Gender": [gender], "Subscription Type": [subscription_type], "Contract Length": [contract_length] }) st.write("### Input Data") st.write(input_df) # ---------------------- Preprocessing ---------------------- categorical_cols = ["Gender", "Subscription Type", "Contract Length"] encoded_cat = encoder.transform(input_df[categorical_cols]) encoded_cat_df = pd.DataFrame(encoded_cat, columns=encoder.get_feature_names_out(categorical_cols)) input_df = input_df.drop(categorical_cols, axis=1) input_transformed = pd.concat([input_df.reset_index(drop=True), encoded_cat_df.reset_index(drop=True)], axis=1) input_scaled = scaler.transform(input_transformed) # ---------------------- Run Prediction ---------------------- if st.button("Predict Churn"): prediction = ensemble.predict(input_scaled) prediction_proba = ensemble.predict_proba(input_scaled)[:, 1] result = "Churned" if prediction[0] == 1 else "Not Churned" st.write("### Prediction Results") st.write(f"**Prediction:** {result}") st.write(f"**Churn Probability:** {prediction_proba[0]:.2f}")