Spaces:

MrUtakata
/

ddgnn

Build error

App Files Files Community

MrUtakata commited on Apr 19

Commit

ac77892

verified ·

1 Parent(s): 23ad8ee

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -53

app.py CHANGED Viewed

@@ -1,66 +1,43 @@
-# app.py
 import streamlit as st
-import joblib
-import nltk
 import torch
-import torch.nn.functional as F
 import numpy as np
-from nltk.corpus import stopwords
-from nltk.tokenize import RegexpTokenizer
-from sklearn.feature_extraction.text import TfidfVectorizer
-# ——— 1) NLTK setup ———
-nltk.download('stopwords')
-_STOP_WORDS = set(stopwords.words('english'))
-_TOKENIZER = RegexpTokenizer(r'\w+')
-def preprocess_text(text: str) -> str:
-    tokens = _TOKENIZER.tokenize(text.lower())
-    return " ".join([t for t in tokens if t not in _STOP_WORDS])
-# ——— 2) Load heavy resources once ———
 @st.cache_resource
-def load_resources():
-    tfidf: TfidfVectorizer = joblib.load("tfidf_vectorizer.pkl")
-    sage_model: torch.nn.Module = joblib.load("sage_model.pkl")
-    sage_model.eval()
-    return tfidf, sage_model
-tfidf, sage_model = load_resources()
-# ——— 3) Streamlit UI ———
-st.title("Disinformation Detection")
-st.write(
-    """
-    Paste in some text and click **Predict**.
-    The model will output the probability it’s **True Information** vs. **Disinformation**.
-    """
-)
-user_input = st.text_area("Your text here", height=200)
-if st.button("Predict"):
-    if not user_input.strip():
-        st.warning("Please enter some text first.")
     else:
-        # Preprocess & vectorize
-        cleaned = preprocess_text(user_input)
-        vec = tfidf.transform([cleaned]).toarray()
-        x = torch.from_numpy(vec).float()   # shape [1, D]
-        # Empty graph so GraphSAGE layers still run
-        edge_index = torch.empty((2, 0), dtype=torch.long)
-        # Inference
         with torch.no_grad():
-            logits = sage_model(x, edge_index)    # [1, 2]
-            probs = torch.exp(logits).numpy()[0]  # convert log‑softmax → probabilities
-        # Display
-        st.markdown("### Prediction probabilities")
-        st.write(f"• 🔵 True information:  {probs[1]:.2%}")
-        st.write(f"• 🔴 Disinformation:    {probs[0]:.2%}")
-        verdict = "✅ Likely TRUE" if probs[1] > probs[0] else "❌ Likely DISINFORMATION"
-        st.markdown(f"## **{verdict}**")

 import streamlit as st
 import torch
+import joblib
+import dill
 import numpy as np
+# Load assets
 @st.cache_resource
+def load_assets():
+    with open("preprocess_function.pkl", "rb") as f:
+        preprocess_text = dill.load(f)
+    tfidf = joblib.load("tfidf_vectorizer.pkl")
+    model = joblib.load("sage_model.pkl")
+    return preprocess_text, tfidf, model
+preprocess_text, tfidf_vectorizer, sage_model = load_assets()
+# App title
+st.title("🧠 Disinformation Detection")
+st.write("This app predicts whether a given news article is **real** or **disinformation** using a trained GraphSAGE model.")
+# Input text
+user_input = st.text_area("📝 Enter a news article or headline:")
+if st.button("Detect"):
+    if user_input.strip() == "":
+        st.warning("Please enter some text to analyze.")
     else:
+        # Preprocess input
+        cleaned_text = preprocess_text(user_input)
+        tfidf_vector = tfidf_vectorizer.transform([cleaned_text])
+        features = torch.tensor(tfidf_vector.toarray(), dtype=torch.float)
+        # Predict
+        sage_model.eval()
         with torch.no_grad():
+            logits = sage_model(features, torch.empty((2, 0), dtype=torch.long))  # dummy edge_index
+            prediction = torch.argmax(logits, dim=1).item()
+            prob = torch.exp(logits)[0, prediction].item()
+        label = "🟢 Real News" if prediction == 1 else "🔴 Disinformation"
+        st.markdown(f"### Prediction: {label}")
+        st.markdown(f"**Confidence:** {prob:.2%}")