Spaces:

ravvasanthosh
/

stack1

No application file

App Files Files Community

stack1 / app.py

ravvasanthosh

Rename app .py to app.py

6693345 verified 7 months ago

raw

history blame contribute delete

5.38 kB

	import streamlit as st
	import joblib
	import pandas as pd
	import re
	from unidecode import unidecode
	import emoji
	import string
	import contractions
	from nltk.stem import PorterStemmer
	import numpy as np
	from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

	# Custom CSS Styling
	st.markdown("""
	<style>
	.stApp {
	background-color: #f9fbfc;
	font-family: 'Segoe UI', sans-serif;
	}
	.custom-header {
	background-color: #1e293b;
	color: white;
	padding: 2rem;
	border-radius: 0.5rem;
	text-align: center;
	margin-bottom: 2rem;
	}
	.custom-header h1 {
	font-size: 2rem;
	margin-bottom: 0.5rem;
	}
	.custom-header p {
	font-size: 1rem;
	color: #cbd5e1;
	}
	.input-box, .output-box {
	background-color: white;
	padding: 1.5rem;
	border-radius: 0.5rem;
	box-shadow: 0 0 10px rgba(0, 0, 0, 0.04);
	margin-bottom: 2rem;
	}
	.tag-pill {
	display: inline-block;
	background-color: #e0f2fe;
	color: #0369a1;
	padding: 0.4em 0.8em;
	margin: 0.25em;
	border-radius: 999px;
	font-weight: 600;
	font-size: 0.9rem;
	}
	.footer {
	text-align: center;
	font-size: 0.85rem;
	color: #64748b;
	margin-top: 2rem;
	}
	</style>
	""", unsafe_allow_html=True)

	# Header
	st.markdown("""
	<div class="custom-header">
	<h1>📌 StackOverflow Tag Predictor</h1>
	<p>Enter a programming question to see predicted tags</p>
	</div>
	""", unsafe_allow_html=True)

	# Initialize components
	stemmer = PorterStemmer()
	stop_words = set(ENGLISH_STOP_WORDS)
	chat_words = {
	"brb": "be right back", "btw": "by the way", "lol": "laugh out loud",
	"afaik": "as far as i know", "imo": "in my opinion", "tbh": "to be honest",
	"idk": "i don't know", "asap": "as soon as possible", "np": "no problem",
	"thx": "thanks", "pls": "please", "fyi": "for your information"
	}

	def preprocess_text(text):
	if not isinstance(text, str) or not text.strip():
	return ""
	try:
	text = re.sub(r'<[^>]+>', '', text)
	text = re.sub(r'https?://\S+\|www\.\S+', '', text)
	text = emoji.demojize(text, delimiters=(" ", " "))
	text = unidecode(text)
	text = contractions.fix(text)
	text = text.lower()
	words = text.split()
	text = " ".join([chat_words.get(word.lower(), word) for word in words])
	text = text.translate(str.maketrans('', '', string.punctuation))
	tokens = re.findall(r'\b\w+\b', text)
	tokens = [word for word in tokens if word not in stop_words]
	tokens = [stemmer.stem(word) for word in tokens]
	return " ".join(tokens)
	except Exception as e:
	st.error(f"Preprocessing error: {e}")
	return ""

	@st.cache_resource
	def load_models():
	try:
	model = joblib.load("tag_model.joblib")
	mlb = joblib.load("tag_binarizer.joblib")
	return model, mlb
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None, None

	model, mlb = load_models()

	# Input
	st.markdown('<div class="input-box">', unsafe_allow_html=True)
	user_input = st.text_area("✍️ Paste your programming question below:", height=200, placeholder="e.g., How to reverse a list in Python?")
	st.markdown('</div>', unsafe_allow_html=True)

	# Prediction
	if st.button("🚀 Predict Tags"):
	if not user_input.strip():
	st.warning("Please enter your question to get predictions.")
	elif model is None or mlb is None:
	st.error("Model loading failed.")
	else:
	with st.spinner("Processing..."):
	processed = preprocess_text(user_input)
	if processed:
	try:
	input_df = pd.DataFrame({'processed_excerpt': [processed]})
	if hasattr(model, "predict_proba"):
	probs = model.predict_proba(input_df)[0]
	top_idx = np.argsort(probs)[-5:][::-1]
	tags = [mlb.classes_[i] for i in top_idx]
	confs = [int(probs[i] * 100) for i in top_idx]
	elif hasattr(model, "decision_function"):
	scores = model.decision_function(input_df)[0]
	top_idx = np.argsort(scores)[-5:][::-1]
	tags = [mlb.classes_[i] for i in top_idx]
	confs = [None] * 5
	else:
	preds = model.predict(input_df)
	tags = mlb.inverse_transform(preds)[0]
	confs = [None] * len(tags)

	# Output
	st.markdown('<div class="output-box"><h4>🏷️ Predicted Tags:</h4>', unsafe_allow_html=True)
	for tag, conf in zip(tags, confs):
	confidence = f" ({conf}%)" if conf is not None else ""
	st.markdown(f'<span class="tag-pill">{tag}{confidence}</span>', unsafe_allow_html=True)
	st.markdown('</div>', unsafe_allow_html=True)
	except Exception as e:
	st.error(f"Prediction error: {e}")

	# Footer
	st.markdown("""
	<div class="footer">
	🔎 This ML tool predicts tags based on programming question content.
	</div>
	""", unsafe_allow_html=True)