Kurkur99 commited on
Commit
0a2d8ac
·
1 Parent(s): 1f04d3e

Update eda.py

Browse files
Files changed (1) hide show
  1. eda.py +8 -1
eda.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
 
5
 
6
  def label_sentiment(rating):
7
  """Label sentiment based on the rating."""
@@ -14,6 +15,12 @@ def label_sentiment(rating):
14
  else:
15
  return 'unknown'
16
 
 
 
 
 
 
 
17
  def display_eda(data):
18
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
19
  if 'sentiment' not in data.columns:
@@ -39,7 +46,7 @@ def display_eda(data):
39
  for sentiment in sentiments:
40
  st.write(f"Word Cloud for {sentiment}")
41
  subset = data[data['sentiment'] == sentiment]
42
- text = " ".join(review for review in subset['processed_review'])
43
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
44
  plt.figure()
45
  plt.imshow(wordcloud, interpolation="bilinear")
 
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from wordcloud import WordCloud
5
+ import re
6
 
7
  def label_sentiment(rating):
8
  """Label sentiment based on the rating."""
 
15
  else:
16
  return 'unknown'
17
 
18
+ def process_review(review):
19
+ """Simple processing for the review text."""
20
+ review = review.lower()
21
+ review = re.sub(r'[^a-z\s]', '', review) # Remove non-alphabetical characters
22
+ return review
23
+
24
  def display_eda(data):
25
  # Derive the 'sentiment' column from 'rating' if it doesn't exist
26
  if 'sentiment' not in data.columns:
 
46
  for sentiment in sentiments:
47
  st.write(f"Word Cloud for {sentiment}")
48
  subset = data[data['sentiment'] == sentiment]
49
+ text = " ".join(process_review(review) for review in subset['review'])
50
  wordcloud = WordCloud(max_words=100, background_color="white").generate(text)
51
  plt.figure()
52
  plt.imshow(wordcloud, interpolation="bilinear")