shreeyad commited on
Commit
d043bf5
·
1 Parent(s): ab7200a

update plots - add barplot

Browse files
Files changed (2) hide show
  1. app.py +36 -33
  2. data/hindi_baseline_all_scores.csv +0 -0
app.py CHANGED
@@ -18,8 +18,6 @@ class LLaMAScoreAnalyzer:
18
  }
19
 
20
  def load_samples(self, lang):
21
- # Show samples for data for selected languages
22
- # st.write(data.sample(5))
23
  cols_to_show = ["instruction", "input", "output"]
24
  for model in self.DATA_PATH[lang]:
25
  df = pd.read_csv(self.DATA_PATH[lang][model])
@@ -40,7 +38,7 @@ class LLaMAScoreAnalyzer:
40
  def draw_specific_plots(self, data, categories, x_variable, title):
41
  fig, ax = plt.subplots(figsize=(12, 6))
42
 
43
- palette = sns.color_palette("tab10", len(categories) * len(data[x_variable].unique()))
44
 
45
  for i, category in enumerate(categories):
46
  for j, unique_value in enumerate(data[x_variable].unique()):
@@ -59,7 +57,7 @@ class LLaMAScoreAnalyzer:
59
  def draw_combined_density_plot(self, data, title):
60
  fig, ax = plt.subplots(figsize=(12, 8))
61
 
62
- palette = sns.color_palette("tab10", len(self.scores_gpt))
63
 
64
  for i, category in enumerate(self.scores_gpt):
65
  sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
@@ -70,8 +68,25 @@ class LLaMAScoreAnalyzer:
70
  ax.legend(title="Score Categories")
71
 
72
  return fig
73
-
 
 
 
 
 
 
 
74
 
 
 
 
 
 
 
 
 
 
 
75
  def score_analyzer(self):
76
  st.sidebar.markdown("""
77
  This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
@@ -101,14 +116,15 @@ class LLaMAScoreAnalyzer:
101
  if detailed_view:
102
  for language in selected_languages:
103
  language_data = combined_data[combined_data['Language'] == language]
104
- title = f"Distribution of Scores Across Models for {language}"
105
  fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
106
  st.pyplot(fig)
107
- for model in selected_models:
108
- model_data = combined_data[combined_data['Model'] == model]
109
- title = f"Distribution of Scores Across Languages for {model}"
110
- fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
111
- st.pyplot(fig)
 
112
 
113
  st.sidebar.markdown("""
114
  Show additional evaluation scores and categories below:
@@ -116,34 +132,21 @@ class LLaMAScoreAnalyzer:
116
  additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
117
  if additional_score_categories:
118
  additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
119
- # for language in selected_languages:
120
- # language_data = combined_data[combined_data['Language'] == language]
121
- # title = f"Distribution of Scores Across Models for {language}"
122
- # fig = self.draw_specific_plots(language_data, additional_categories, 'Model', title)
123
- # st.pyplot(fig)
124
- # for model in selected_models:
125
- # model_data = combined_data[combined_data['Model'] == model]
126
- # title = f"Distribution of Scores Across Languages for {model}"
127
- # fig = self.draw_specific_plots(model_data, additional_categories, 'Language', title)
128
- # st.pyplot(fig)
129
-
130
- rouge_bleu_score = st.sidebar.checkbox("Rouge and BLEU Scores", value=False)
131
- if rouge_bleu_score:
132
- rouge_bleu_scores = st.sidebar.multiselect("Select Category", self.rouge_bleu, default="rougeL")
133
  for language in selected_languages:
134
  language_data = combined_data[combined_data['Language'] == language]
135
- title = f"Distribution of Scores Across Models for {language}"
136
- fig = self.draw_specific_plots(language_data, rouge_bleu_scores, 'Model', title)
137
- st.pyplot(fig)
138
- for model in selected_models:
139
- model_data = combined_data[combined_data['Model'] == model]
140
- title = f"Distribution of Scores Across Languages for {model}"
141
- fig = self.draw_specific_plots(model_data, rouge_bleu_scores, 'Language', title)
142
  st.pyplot(fig)
 
 
 
 
 
 
143
  else:
144
  for language in selected_languages:
145
  for model in selected_models:
146
- title = f"Combined Density of All Categories for {language} - {model}"
147
  fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
148
  (combined_data['Model'] == model)], title)
149
  st.pyplot(fig)
 
18
  }
19
 
20
  def load_samples(self, lang):
 
 
21
  cols_to_show = ["instruction", "input", "output"]
22
  for model in self.DATA_PATH[lang]:
23
  df = pd.read_csv(self.DATA_PATH[lang][model])
 
38
  def draw_specific_plots(self, data, categories, x_variable, title):
39
  fig, ax = plt.subplots(figsize=(12, 6))
40
 
41
+ palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))
42
 
43
  for i, category in enumerate(categories):
44
  for j, unique_value in enumerate(data[x_variable].unique()):
 
57
  def draw_combined_density_plot(self, data, title):
58
  fig, ax = plt.subplots(figsize=(12, 8))
59
 
60
+ palette = sns.color_palette("pastel", len(self.scores_gpt))
61
 
62
  for i, category in enumerate(self.scores_gpt):
63
  sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
 
68
  ax.legend(title="Score Categories")
69
 
70
  return fig
71
+
72
+ def draw_bar_plot(self, data, categories, x_variable, title):
73
+ fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))
74
+
75
+ palette = sns.color_palette("pastel", len(categories))
76
+
77
+ if len(categories) == 1:
78
+ axs = [axs] # Ensure axs is iterable even for a single plot
79
 
80
+ for i, category in enumerate(categories):
81
+ sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
82
+ axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
83
+ axs[i].set_xlabel(category, fontsize=12)
84
+ axs[i].set_ylabel("Count", fontsize=12)
85
+ axs[i].legend(title=x_variable)
86
+
87
+ plt.tight_layout()
88
+ return fig
89
+
90
  def score_analyzer(self):
91
  st.sidebar.markdown("""
92
  This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
 
116
  if detailed_view:
117
  for language in selected_languages:
118
  language_data = combined_data[combined_data['Language'] == language]
119
+ title = f"Distribution of {selected_gpt_scoring} for {language}"
120
  fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
121
  st.pyplot(fig)
122
+ if len(selected_languages) > 1:
123
+ for model in selected_models:
124
+ model_data = combined_data[combined_data['Model'] == model]
125
+ title = f"Distribution of {selected_gpt_scoring} for {model}"
126
+ fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
127
+ st.pyplot(fig)
128
 
129
  st.sidebar.markdown("""
130
  Show additional evaluation scores and categories below:
 
132
  additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
133
  if additional_score_categories:
134
  additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  for language in selected_languages:
136
  language_data = combined_data[combined_data['Language'] == language]
137
+ title = f"{language}"
138
+ fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
 
 
 
 
 
139
  st.pyplot(fig)
140
+ if len(selected_languages) > 1:
141
+ for model in selected_models:
142
+ model_data = combined_data[combined_data['Model'] == model]
143
+ title = f"{model}"
144
+ fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
145
+ st.pyplot(fig)
146
  else:
147
  for language in selected_languages:
148
  for model in selected_models:
149
+ title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
150
  fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
151
  (combined_data['Model'] == model)], title)
152
  st.pyplot(fig)
data/hindi_baseline_all_scores.csv CHANGED
The diff for this file is too large to render. See raw diff