Spaces:
No application file
No application file
| import streamlit as st | |
| import joblib | |
| import pandas as pd | |
| import re | |
| from unidecode import unidecode | |
| import emoji | |
| import string | |
| import contractions | |
| from nltk.stem import PorterStemmer | |
| import numpy as np | |
| from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS | |
| # Custom CSS Styling | |
| st.markdown(""" | |
| <style> | |
| .stApp { | |
| background-color: #f9fbfc; | |
| font-family: 'Segoe UI', sans-serif; | |
| } | |
| .custom-header { | |
| background-color: #1e293b; | |
| color: white; | |
| padding: 2rem; | |
| border-radius: 0.5rem; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .custom-header h1 { | |
| font-size: 2rem; | |
| margin-bottom: 0.5rem; | |
| } | |
| .custom-header p { | |
| font-size: 1rem; | |
| color: #cbd5e1; | |
| } | |
| .input-box, .output-box { | |
| background-color: white; | |
| padding: 1.5rem; | |
| border-radius: 0.5rem; | |
| box-shadow: 0 0 10px rgba(0, 0, 0, 0.04); | |
| margin-bottom: 2rem; | |
| } | |
| .tag-pill { | |
| display: inline-block; | |
| background-color: #e0f2fe; | |
| color: #0369a1; | |
| padding: 0.4em 0.8em; | |
| margin: 0.25em; | |
| border-radius: 999px; | |
| font-weight: 600; | |
| font-size: 0.9rem; | |
| } | |
| .footer { | |
| text-align: center; | |
| font-size: 0.85rem; | |
| color: #64748b; | |
| margin-top: 2rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Header | |
| st.markdown(""" | |
| <div class="custom-header"> | |
| <h1>π StackOverflow Tag Predictor</h1> | |
| <p>Enter a programming question to see predicted tags</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Initialize components | |
| stemmer = PorterStemmer() | |
| stop_words = set(ENGLISH_STOP_WORDS) | |
| chat_words = { | |
| "brb": "be right back", "btw": "by the way", "lol": "laugh out loud", | |
| "afaik": "as far as i know", "imo": "in my opinion", "tbh": "to be honest", | |
| "idk": "i don't know", "asap": "as soon as possible", "np": "no problem", | |
| "thx": "thanks", "pls": "please", "fyi": "for your information" | |
| } | |
| def preprocess_text(text): | |
| if not isinstance(text, str) or not text.strip(): | |
| return "" | |
| try: | |
| text = re.sub(r'<[^>]+>', '', text) | |
| text = re.sub(r'https?://\S+|www\.\S+', '', text) | |
| text = emoji.demojize(text, delimiters=(" ", " ")) | |
| text = unidecode(text) | |
| text = contractions.fix(text) | |
| text = text.lower() | |
| words = text.split() | |
| text = " ".join([chat_words.get(word.lower(), word) for word in words]) | |
| text = text.translate(str.maketrans('', '', string.punctuation)) | |
| tokens = re.findall(r'\b\w+\b', text) | |
| tokens = [word for word in tokens if word not in stop_words] | |
| tokens = [stemmer.stem(word) for word in tokens] | |
| return " ".join(tokens) | |
| except Exception as e: | |
| st.error(f"Preprocessing error: {e}") | |
| return "" | |
| def load_models(): | |
| try: | |
| model = joblib.load("tag_model.joblib") | |
| mlb = joblib.load("tag_binarizer.joblib") | |
| return model, mlb | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| return None, None | |
| model, mlb = load_models() | |
| # Input | |
| st.markdown('<div class="input-box">', unsafe_allow_html=True) | |
| user_input = st.text_area("βοΈ Paste your programming question below:", height=200, placeholder="e.g., How to reverse a list in Python?") | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| # Prediction | |
| if st.button("π Predict Tags"): | |
| if not user_input.strip(): | |
| st.warning("Please enter your question to get predictions.") | |
| elif model is None or mlb is None: | |
| st.error("Model loading failed.") | |
| else: | |
| with st.spinner("Processing..."): | |
| processed = preprocess_text(user_input) | |
| if processed: | |
| try: | |
| input_df = pd.DataFrame({'processed_excerpt': [processed]}) | |
| if hasattr(model, "predict_proba"): | |
| probs = model.predict_proba(input_df)[0] | |
| top_idx = np.argsort(probs)[-5:][::-1] | |
| tags = [mlb.classes_[i] for i in top_idx] | |
| confs = [int(probs[i] * 100) for i in top_idx] | |
| elif hasattr(model, "decision_function"): | |
| scores = model.decision_function(input_df)[0] | |
| top_idx = np.argsort(scores)[-5:][::-1] | |
| tags = [mlb.classes_[i] for i in top_idx] | |
| confs = [None] * 5 | |
| else: | |
| preds = model.predict(input_df) | |
| tags = mlb.inverse_transform(preds)[0] | |
| confs = [None] * len(tags) | |
| # Output | |
| st.markdown('<div class="output-box"><h4>π·οΈ Predicted Tags:</h4>', unsafe_allow_html=True) | |
| for tag, conf in zip(tags, confs): | |
| confidence = f" ({conf}%)" if conf is not None else "" | |
| st.markdown(f'<span class="tag-pill">{tag}{confidence}</span>', unsafe_allow_html=True) | |
| st.markdown('</div>', unsafe_allow_html=True) | |
| except Exception as e: | |
| st.error(f"Prediction error: {e}") | |
| # Footer | |
| st.markdown(""" | |
| <div class="footer"> | |
| π This ML tool predicts tags based on programming question content. | |
| </div> | |
| """, unsafe_allow_html=True) | |