Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from wordcloud import WordCloud | |
| import re | |
| def label_sentiment(rating): | |
| """Label sentiment based on the rating.""" | |
| if rating in [1, 2]: | |
| return 'negative' | |
| elif rating == 3: | |
| return 'neutral' | |
| elif rating in [4, 5]: | |
| return 'positive' | |
| else: | |
| return 'unknown' | |
| def process_review(review): | |
| """Simple processing for the review text.""" | |
| review = review.lower() | |
| review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters | |
| return review | |
| def display_eda(data): | |
| # Derive the 'sentiment' column from 'rating' if it doesn't exist | |
| if 'sentiment' not in data.columns: | |
| if 'rating' not in data.columns: | |
| st.error("The dataset does not contain a 'rating' or 'sentiment' column. Please check the data source.") | |
| return | |
| else: | |
| data['sentiment'] = data['rating'].apply(label_sentiment) | |
| # Distribution of sentiments | |
| st.subheader("Distribution of Sentiments") | |
| sentiment_counts = data['sentiment'].value_counts() | |
| fig, ax = plt.subplots() | |
| sentiment_counts.plot(kind='bar', ax=ax) | |
| ax.set_title('Distribution of Sentiments') | |
| ax.set_xlabel('Sentiment') | |
| ax.set_ylabel('Count') | |
| st.pyplot(fig) | |
| # Word cloud for each sentiment | |
| st.subheader("Word Clouds for Sentiments") | |
| sentiments = data['sentiment'].unique() | |
| for sentiment in sentiments: | |
| st.write(f"Word Cloud for {sentiment}") | |
| subset = data[data['sentiment'] == sentiment] | |
| text = " ".join(process_review(review) for review in subset['review_description']) | |
| wordcloud = WordCloud(max_words=100, background_color="white").generate(text) | |
| fig = plt.figure() | |
| plt.imshow(wordcloud, interpolation="bilinear") | |
| plt.axis("off") | |
| st.pyplot(fig) | |