Spaces:

Kurkur99
/

Sentiment_analysis

Runtime error

App Files Files Community

Sentiment_analysis / eda.py

Kurkur99

Update eda.py

298c4f8 about 2 years ago

raw

history blame contribute delete

1.89 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import re

	def label_sentiment(rating):
	"""Label sentiment based on the rating."""
	if rating in [1, 2]:
	return 'negative'
	elif rating == 3:
	return 'neutral'
	elif rating in [4, 5]:
	return 'positive'
	else:
	return 'unknown'

	def process_review(review):
	"""Simple processing for the review text."""
	review = review.lower()
	review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
	return review

	def display_eda(data):
	# Derive the 'sentiment' column from 'rating' if it doesn't exist
	if 'sentiment' not in data.columns:
	if 'rating' not in data.columns:
	st.error("The dataset does not contain a 'rating' or 'sentiment' column. Please check the data source.")
	return
	else:
	data['sentiment'] = data['rating'].apply(label_sentiment)

	# Distribution of sentiments
	st.subheader("Distribution of Sentiments")
	sentiment_counts = data['sentiment'].value_counts()
	fig, ax = plt.subplots()
	sentiment_counts.plot(kind='bar', ax=ax)
	ax.set_title('Distribution of Sentiments')
	ax.set_xlabel('Sentiment')
	ax.set_ylabel('Count')
	st.pyplot(fig)

	# Word cloud for each sentiment
	st.subheader("Word Clouds for Sentiments")
	sentiments = data['sentiment'].unique()
	for sentiment in sentiments:
	st.write(f"Word Cloud for {sentiment}")
	subset = data[data['sentiment'] == sentiment]
	text = " ".join(process_review(review) for review in subset['review_description'])
	wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
	fig = plt.figure()
	plt.imshow(wordcloud, interpolation="bilinear")
	plt.axis("off")
	st.pyplot(fig)