Spaces:
Runtime error
Runtime error
Update eda.py
Browse files
eda.py
CHANGED
|
@@ -2,6 +2,7 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from wordcloud import WordCloud
|
|
|
|
| 5 |
|
| 6 |
def label_sentiment(rating):
|
| 7 |
"""Label sentiment based on the rating."""
|
|
@@ -14,6 +15,12 @@ def label_sentiment(rating):
|
|
| 14 |
else:
|
| 15 |
return 'unknown'
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def display_eda(data):
|
| 18 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 19 |
if 'sentiment' not in data.columns:
|
|
@@ -39,7 +46,7 @@ def display_eda(data):
|
|
| 39 |
for sentiment in sentiments:
|
| 40 |
st.write(f"Word Cloud for {sentiment}")
|
| 41 |
subset = data[data['sentiment'] == sentiment]
|
| 42 |
-
text = " ".join(review for review in subset['
|
| 43 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 44 |
plt.figure()
|
| 45 |
plt.imshow(wordcloud, interpolation="bilinear")
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
from wordcloud import WordCloud
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
def label_sentiment(rating):
|
| 8 |
"""Label sentiment based on the rating."""
|
|
|
|
| 15 |
else:
|
| 16 |
return 'unknown'
|
| 17 |
|
| 18 |
+
def process_review(review):
|
| 19 |
+
"""Simple processing for the review text."""
|
| 20 |
+
review = review.lower()
|
| 21 |
+
review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
|
| 22 |
+
return review
|
| 23 |
+
|
| 24 |
def display_eda(data):
|
| 25 |
# Derive the 'sentiment' column from 'rating' if it doesn't exist
|
| 26 |
if 'sentiment' not in data.columns:
|
|
|
|
| 46 |
for sentiment in sentiments:
|
| 47 |
st.write(f"Word Cloud for {sentiment}")
|
| 48 |
subset = data[data['sentiment'] == sentiment]
|
| 49 |
+
text = " ".join(process_review(review) for review in subset['review'])
|
| 50 |
wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
|
| 51 |
plt.figure()
|
| 52 |
plt.imshow(wordcloud, interpolation="bilinear")
|