File size: 3,568 Bytes
c625993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import pandas as pd
import numpy as np
import os
import gdown
import pickle
import joblib

# ---------------------- Download and Load Ensemble Model ----------------------
# Path to save the downloaded model
ensemble_model_path = "ensemble_model.pkl"

# Download the model if it does not exist yet
if not os.path.exists(ensemble_model_path):
    url = "https://drive.google.com/uc?export=download&id=1jHtHOzfhtWMyYqX_pbQJ5akYAe6ZhfhU"
    gdown.download(url, ensemble_model_path, quiet=False)

# Load the ensemble model using pickle
with open(ensemble_model_path, "rb") as f:
    ensemble = pickle.load(f)

# ---------------------- Load Preprocessing Objects ----------------------
# Ensure that the files onehotencoder.pkl and scaler.pkl are in the same directory.
encoder = joblib.load("onehotencoder.pkl")
scaler = joblib.load("scaler.pkl")

# ---------------------- Set up the Streamlit App ----------------------
st.title("Customer Churn Predictor")
st.write("""
This app uses a trained machine learning model to predict whether a customer is likely to churn.
Please enter the customer details below.
""")

# ---------------------- User Inputs ----------------------
st.header("Customer Details")
age = st.number_input("Age", min_value=18, max_value=100, value=30)
tenure = st.number_input("Tenure (in months)", min_value=0, max_value=120, value=12)
usage_frequency = st.number_input("Usage Frequency", min_value=0, max_value=100, value=5)
support_calls = st.number_input("Support Calls", min_value=0, max_value=50, value=2)
total_spend = st.number_input("Total Spend", min_value=0.0, max_value=10000.0, value=100.0, step=10.0)

# Input for categorical fields (modify options as needed for your training data)
gender = st.selectbox("Gender", options=["Male", "Female"])
subscription_type = st.selectbox("Subscription Type", options=["Type A", "Type B", "Type C"])
contract_length = st.selectbox("Contract Length", options=["Monthly", "Quarterly", "Yearly"])

# Create a DataFrame for the input
input_df = pd.DataFrame({
    "Age": [age],
    "Tenure": [tenure],
    "Usage Frequency": [usage_frequency],
    "Support Calls": [support_calls],
    "Total Spend": [total_spend],
    "Gender": [gender],
    "Subscription Type": [subscription_type],
    "Contract Length": [contract_length]
})

st.write("### Input Data")
st.write(input_df)

# ---------------------- Preprocessing ----------------------
# 1. Encode categorical features using the loaded OneHotEncoder.
categorical_cols = ["Gender", "Subscription Type", "Contract Length"]
encoded_cat = encoder.transform(input_df[categorical_cols])
encoded_cat_df = pd.DataFrame(encoded_cat, columns=encoder.get_feature_names_out(categorical_cols))

# 2. Drop original categorical columns and concatenate encoded features.
input_df = input_df.drop(categorical_cols, axis=1)
input_transformed = pd.concat([input_df.reset_index(drop=True), encoded_cat_df.reset_index(drop=True)], axis=1)

# 3. Standardize numerical features using the loaded StandardScaler.
input_scaled = scaler.transform(input_transformed)

# ---------------------- Run Prediction ----------------------
if st.button("Predict Churn"):
    prediction = ensemble.predict(input_scaled)
    prediction_proba = ensemble.predict_proba(input_scaled)[:, 1]
    
    # Interpret prediction: assuming 1 = churn, 0 = not churn
    result = "Churned" if prediction[0] == 1 else "Not Churned"
    st.write("### Prediction Results")
    st.write(f"**Prediction:** {result}")
    st.write(f"**Churn Probability:** {prediction_proba[0]:.2f}")