Spaces:
Sleeping
Sleeping
update plots - add barplot
Browse files- app.py +36 -33
- data/hindi_baseline_all_scores.csv +0 -0
app.py
CHANGED
@@ -18,8 +18,6 @@ class LLaMAScoreAnalyzer:
|
|
18 |
}
|
19 |
|
20 |
def load_samples(self, lang):
|
21 |
-
# Show samples for data for selected languages
|
22 |
-
# st.write(data.sample(5))
|
23 |
cols_to_show = ["instruction", "input", "output"]
|
24 |
for model in self.DATA_PATH[lang]:
|
25 |
df = pd.read_csv(self.DATA_PATH[lang][model])
|
@@ -40,7 +38,7 @@ class LLaMAScoreAnalyzer:
|
|
40 |
def draw_specific_plots(self, data, categories, x_variable, title):
|
41 |
fig, ax = plt.subplots(figsize=(12, 6))
|
42 |
|
43 |
-
palette = sns.color_palette("
|
44 |
|
45 |
for i, category in enumerate(categories):
|
46 |
for j, unique_value in enumerate(data[x_variable].unique()):
|
@@ -59,7 +57,7 @@ class LLaMAScoreAnalyzer:
|
|
59 |
def draw_combined_density_plot(self, data, title):
|
60 |
fig, ax = plt.subplots(figsize=(12, 8))
|
61 |
|
62 |
-
palette = sns.color_palette("
|
63 |
|
64 |
for i, category in enumerate(self.scores_gpt):
|
65 |
sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
|
@@ -70,8 +68,25 @@ class LLaMAScoreAnalyzer:
|
|
70 |
ax.legend(title="Score Categories")
|
71 |
|
72 |
return fig
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def score_analyzer(self):
|
76 |
st.sidebar.markdown("""
|
77 |
This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
|
@@ -101,14 +116,15 @@ class LLaMAScoreAnalyzer:
|
|
101 |
if detailed_view:
|
102 |
for language in selected_languages:
|
103 |
language_data = combined_data[combined_data['Language'] == language]
|
104 |
-
title = f"Distribution of
|
105 |
fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
|
106 |
st.pyplot(fig)
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
112 |
|
113 |
st.sidebar.markdown("""
|
114 |
Show additional evaluation scores and categories below:
|
@@ -116,34 +132,21 @@ class LLaMAScoreAnalyzer:
|
|
116 |
additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
|
117 |
if additional_score_categories:
|
118 |
additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
|
119 |
-
# for language in selected_languages:
|
120 |
-
# language_data = combined_data[combined_data['Language'] == language]
|
121 |
-
# title = f"Distribution of Scores Across Models for {language}"
|
122 |
-
# fig = self.draw_specific_plots(language_data, additional_categories, 'Model', title)
|
123 |
-
# st.pyplot(fig)
|
124 |
-
# for model in selected_models:
|
125 |
-
# model_data = combined_data[combined_data['Model'] == model]
|
126 |
-
# title = f"Distribution of Scores Across Languages for {model}"
|
127 |
-
# fig = self.draw_specific_plots(model_data, additional_categories, 'Language', title)
|
128 |
-
# st.pyplot(fig)
|
129 |
-
|
130 |
-
rouge_bleu_score = st.sidebar.checkbox("Rouge and BLEU Scores", value=False)
|
131 |
-
if rouge_bleu_score:
|
132 |
-
rouge_bleu_scores = st.sidebar.multiselect("Select Category", self.rouge_bleu, default="rougeL")
|
133 |
for language in selected_languages:
|
134 |
language_data = combined_data[combined_data['Language'] == language]
|
135 |
-
title = f"
|
136 |
-
fig = self.
|
137 |
-
st.pyplot(fig)
|
138 |
-
for model in selected_models:
|
139 |
-
model_data = combined_data[combined_data['Model'] == model]
|
140 |
-
title = f"Distribution of Scores Across Languages for {model}"
|
141 |
-
fig = self.draw_specific_plots(model_data, rouge_bleu_scores, 'Language', title)
|
142 |
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
else:
|
144 |
for language in selected_languages:
|
145 |
for model in selected_models:
|
146 |
-
title = f"
|
147 |
fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
|
148 |
(combined_data['Model'] == model)], title)
|
149 |
st.pyplot(fig)
|
|
|
18 |
}
|
19 |
|
20 |
def load_samples(self, lang):
|
|
|
|
|
21 |
cols_to_show = ["instruction", "input", "output"]
|
22 |
for model in self.DATA_PATH[lang]:
|
23 |
df = pd.read_csv(self.DATA_PATH[lang][model])
|
|
|
38 |
def draw_specific_plots(self, data, categories, x_variable, title):
|
39 |
fig, ax = plt.subplots(figsize=(12, 6))
|
40 |
|
41 |
+
palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))
|
42 |
|
43 |
for i, category in enumerate(categories):
|
44 |
for j, unique_value in enumerate(data[x_variable].unique()):
|
|
|
57 |
def draw_combined_density_plot(self, data, title):
|
58 |
fig, ax = plt.subplots(figsize=(12, 8))
|
59 |
|
60 |
+
palette = sns.color_palette("pastel", len(self.scores_gpt))
|
61 |
|
62 |
for i, category in enumerate(self.scores_gpt):
|
63 |
sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
|
|
|
68 |
ax.legend(title="Score Categories")
|
69 |
|
70 |
return fig
|
71 |
+
|
72 |
+
def draw_bar_plot(self, data, categories, x_variable, title):
|
73 |
+
fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))
|
74 |
+
|
75 |
+
palette = sns.color_palette("pastel", len(categories))
|
76 |
+
|
77 |
+
if len(categories) == 1:
|
78 |
+
axs = [axs] # Ensure axs is iterable even for a single plot
|
79 |
|
80 |
+
for i, category in enumerate(categories):
|
81 |
+
sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
|
82 |
+
axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
|
83 |
+
axs[i].set_xlabel(category, fontsize=12)
|
84 |
+
axs[i].set_ylabel("Count", fontsize=12)
|
85 |
+
axs[i].legend(title=x_variable)
|
86 |
+
|
87 |
+
plt.tight_layout()
|
88 |
+
return fig
|
89 |
+
|
90 |
def score_analyzer(self):
|
91 |
st.sidebar.markdown("""
|
92 |
This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
|
|
|
116 |
if detailed_view:
|
117 |
for language in selected_languages:
|
118 |
language_data = combined_data[combined_data['Language'] == language]
|
119 |
+
title = f"Distribution of {selected_gpt_scoring} for {language}"
|
120 |
fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
|
121 |
st.pyplot(fig)
|
122 |
+
if len(selected_languages) > 1:
|
123 |
+
for model in selected_models:
|
124 |
+
model_data = combined_data[combined_data['Model'] == model]
|
125 |
+
title = f"Distribution of {selected_gpt_scoring} for {model}"
|
126 |
+
fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
|
127 |
+
st.pyplot(fig)
|
128 |
|
129 |
st.sidebar.markdown("""
|
130 |
Show additional evaluation scores and categories below:
|
|
|
132 |
additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
|
133 |
if additional_score_categories:
|
134 |
additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
for language in selected_languages:
|
136 |
language_data = combined_data[combined_data['Language'] == language]
|
137 |
+
title = f"{language}"
|
138 |
+
fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
|
|
|
|
|
|
|
|
|
|
|
139 |
st.pyplot(fig)
|
140 |
+
if len(selected_languages) > 1:
|
141 |
+
for model in selected_models:
|
142 |
+
model_data = combined_data[combined_data['Model'] == model]
|
143 |
+
title = f"{model}"
|
144 |
+
fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
|
145 |
+
st.pyplot(fig)
|
146 |
else:
|
147 |
for language in selected_languages:
|
148 |
for model in selected_models:
|
149 |
+
title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
|
150 |
fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
|
151 |
(combined_data['Model'] == model)], title)
|
152 |
st.pyplot(fig)
|
data/hindi_baseline_all_scores.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|