shresht8 commited on
Commit
71e3164
·
verified ·
1 Parent(s): 0294743

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +125 -20
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  import pandas as pd
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
 
5
 
6
  # Load model and tokenizer globally for efficiency
7
  model_name = "tabularisai/multilingual-sentiment-analysis"
@@ -27,36 +30,128 @@ def predict_sentiment(texts):
27
  return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def process_file(file_obj):
31
  """
32
  Process the input file and add sentiment analysis results
33
  """
34
  try:
35
- # Read the file based on its extension
36
  file_path = file_obj.name
 
 
 
37
  if file_path.endswith('.csv'):
 
38
  df = pd.read_csv(file_path)
 
 
 
 
 
39
  elif file_path.endswith(('.xlsx', '.xls')):
40
- df = pd.read_excel(file_path)
 
 
 
 
 
 
 
41
  else:
42
  raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
43
 
44
- # Verify that 'Reviews' column exists
45
- if 'Reviews' not in df.columns:
46
- raise ValueError("Input file must contain a 'Reviews' column.")
47
 
48
- # Perform sentiment analysis
49
- reviews = df['Reviews'].fillna("") # Handle any missing values
50
- sentiments = predict_sentiment(reviews.tolist())
 
 
 
51
 
52
- # Add results to the dataframe
53
- df['Sentiment'] = sentiments
54
 
55
- # Save the results to a new Excel file
56
- output_path = "output_with_sentiment.xlsx"
57
- df.to_excel(output_path, index=False)
58
-
59
- return df, output_path
 
60
 
61
  except Exception as e:
62
  raise gr.Error(str(e))
@@ -64,8 +159,12 @@ def process_file(file_obj):
64
 
65
  # Create Gradio interface
66
  with gr.Blocks() as interface:
67
- gr.Markdown("# Review Sentiment Analysis")
68
- gr.Markdown("Upload an Excel or CSV file with a 'Reviews' column to analyze sentiment.")
 
 
 
 
69
 
70
  with gr.Row():
71
  file_input = gr.File(
@@ -77,13 +176,19 @@ with gr.Blocks() as interface:
77
  analyze_btn = gr.Button("Analyze Sentiments")
78
 
79
  with gr.Row():
80
- output_df = gr.Dataframe(label="Results Preview")
81
- output_file = gr.File(label="Download Results")
 
 
 
 
 
 
82
 
83
  analyze_btn.click(
84
  fn=process_file,
85
  inputs=[file_input],
86
- outputs=[output_df, output_file]
87
  )
88
 
89
  # Launch the interface
 
2
  import pandas as pd
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from collections import defaultdict
8
 
9
  # Load model and tokenizer globally for efficiency
10
  model_name = "tabularisai/multilingual-sentiment-analysis"
 
30
  return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]
31
 
32
 
33
+ def process_single_sheet(df, product_name):
34
+ """
35
+ Process a single dataframe and return sentiment analysis results
36
+ """
37
+ if 'Reviews' not in df.columns:
38
+ raise ValueError(f"'Reviews' column not found in sheet/file for {product_name}")
39
+
40
+ reviews = df['Reviews'].fillna("")
41
+ sentiments = predict_sentiment(reviews.tolist())
42
+ df['Sentiment'] = sentiments
43
+
44
+ # Calculate sentiment distribution
45
+ sentiment_counts = pd.Series(sentiments).value_counts()
46
+
47
+ return df, sentiment_counts
48
+
49
+
50
+ def create_comparison_charts(sentiment_results):
51
+ """
52
+ Create comparison charts for different products
53
+ Returns two plotly figures: bar chart and pie chart
54
+ """
55
+ # Prepare data for plotting
56
+ products = []
57
+ sentiments = []
58
+ counts = []
59
+
60
+ for product, sentiment_counts in sentiment_results.items():
61
+ for sentiment, count in sentiment_counts.items():
62
+ products.append(product)
63
+ sentiments.append(sentiment)
64
+ counts.append(count)
65
+
66
+ plot_df = pd.DataFrame({
67
+ 'Product': products,
68
+ 'Sentiment': sentiments,
69
+ 'Count': counts
70
+ })
71
+
72
+ # Create stacked bar chart
73
+ bar_fig = px.bar(plot_df,
74
+ x='Product',
75
+ y='Count',
76
+ color='Sentiment',
77
+ title='Sentiment Distribution by Product',
78
+ labels={'Count': 'Number of Reviews'},
79
+ color_discrete_sequence=px.colors.qualitative.Set3)
80
+
81
+ # Create pie chart for overall sentiment distribution
82
+ pie_fig = px.pie(plot_df,
83
+ values='Count',
84
+ names='Sentiment',
85
+ title='Overall Sentiment Distribution',
86
+ color_discrete_sequence=px.colors.qualitative.Set3)
87
+
88
+ # Create summary table
89
+ summary_df = plot_df.pivot_table(
90
+ values='Count',
91
+ index='Product',
92
+ columns='Sentiment',
93
+ fill_value=0
94
+ ).round(2)
95
+
96
+ # Add total reviews column
97
+ summary_df['Total Reviews'] = summary_df.sum(axis=1)
98
+
99
+ # Calculate percentage of positive reviews (Positive + Very Positive)
100
+ positive_cols = ['Positive', 'Very Positive']
101
+ positive_cols = [col for col in positive_cols if col in summary_df.columns]
102
+ summary_df['Positive Ratio'] = (summary_df[positive_cols].sum(axis=1) / summary_df['Total Reviews'] * 100).round(2)
103
+
104
+ return bar_fig, pie_fig, summary_df
105
+
106
+
107
  def process_file(file_obj):
108
  """
109
  Process the input file and add sentiment analysis results
110
  """
111
  try:
 
112
  file_path = file_obj.name
113
+ sentiment_results = defaultdict(pd.Series)
114
+ all_processed_dfs = {}
115
+
116
  if file_path.endswith('.csv'):
117
+ # Process single CSV file
118
  df = pd.read_csv(file_path)
119
+ product_name = "Product" # Default name for CSV
120
+ processed_df, sentiment_counts = process_single_sheet(df, product_name)
121
+ all_processed_dfs[product_name] = processed_df
122
+ sentiment_results[product_name] = sentiment_counts
123
+
124
  elif file_path.endswith(('.xlsx', '.xls')):
125
+ # Process multiple sheets in Excel file
126
+ excel_file = pd.ExcelFile(file_path)
127
+
128
+ for sheet_name in excel_file.sheet_names:
129
+ df = pd.read_excel(file_path, sheet_name=sheet_name)
130
+ processed_df, sentiment_counts = process_single_sheet(df, sheet_name)
131
+ all_processed_dfs[sheet_name] = processed_df
132
+ sentiment_results[sheet_name] = sentiment_counts
133
  else:
134
  raise ValueError("Unsupported file format. Please upload a CSV or Excel file.")
135
 
136
+ # Create visualizations
137
+ bar_chart, pie_chart, summary_table = create_comparison_charts(sentiment_results)
 
138
 
139
+ # Save results to a new Excel file
140
+ output_path = "sentiment_analysis_results.xlsx"
141
+ with pd.ExcelWriter(output_path) as writer:
142
+ # Save processed data
143
+ for sheet_name, df in all_processed_dfs.items():
144
+ df.to_excel(writer, sheet_name=sheet_name, index=False)
145
 
146
+ # Save summary
147
+ summary_table.to_excel(writer, sheet_name='Summary', index=True)
148
 
149
+ return (
150
+ bar_chart,
151
+ pie_chart,
152
+ summary_table,
153
+ output_path
154
+ )
155
 
156
  except Exception as e:
157
  raise gr.Error(str(e))
 
159
 
160
  # Create Gradio interface
161
  with gr.Blocks() as interface:
162
+ gr.Markdown("# Multi-Product Review Sentiment Analysis")
163
+ gr.Markdown("""
164
+ Upload a file to analyze sentiments:
165
+ - For CSV: Single product reviews with 'Reviews' column
166
+ - For Excel: Multiple sheets, each named after the product, with 'Reviews' column
167
+ """)
168
 
169
  with gr.Row():
170
  file_input = gr.File(
 
176
  analyze_btn = gr.Button("Analyze Sentiments")
177
 
178
  with gr.Row():
179
+ bar_plot = gr.Plot(label="Sentiment Distribution by Product")
180
+ pie_plot = gr.Plot(label="Overall Sentiment Distribution")
181
+
182
+ with gr.Row():
183
+ summary_table = gr.Dataframe(label="Summary Statistics")
184
+
185
+ with gr.Row():
186
+ output_file = gr.File(label="Download Detailed Results")
187
 
188
  analyze_btn.click(
189
  fn=process_file,
190
  inputs=[file_input],
191
+ outputs=[bar_plot, pie_plot, summary_table, output_file]
192
  )
193
 
194
  # Launch the interface
requirements.txt CHANGED
@@ -2,4 +2,5 @@ transformers
2
  openpyxl
3
  torch
4
  pandas
5
- gradio
 
 
2
  openpyxl
3
  torch
4
  pandas
5
+ gradio
6
+ plotly