Spaces:

shreeyad
/

llama-3-scores

Sleeping

App Files Files Community

shreeyad commited on Aug 17, 2024

Commit

d043bf5

1 Parent(s): ab7200a

update plots - add barplot

Browse files

Files changed (2) hide show

app.py +36 -33
data/hindi_baseline_all_scores.csv +0 -0

app.py CHANGED Viewed

@@ -18,8 +18,6 @@ class LLaMAScoreAnalyzer:
             }
     def load_samples(self, lang):
-        # Show samples for data for selected languages
-        # st.write(data.sample(5))
         cols_to_show = ["instruction", "input", "output"]
         for model in self.DATA_PATH[lang]:
             df = pd.read_csv(self.DATA_PATH[lang][model])
@@ -40,7 +38,7 @@ class LLaMAScoreAnalyzer:
     def draw_specific_plots(self, data, categories, x_variable, title):
         fig, ax = plt.subplots(figsize=(12, 6))
-        palette = sns.color_palette("tab10", len(categories) * len(data[x_variable].unique()))
         for i, category in enumerate(categories):
             for j, unique_value in enumerate(data[x_variable].unique()):
@@ -59,7 +57,7 @@ class LLaMAScoreAnalyzer:
     def draw_combined_density_plot(self, data, title):
         fig, ax = plt.subplots(figsize=(12, 8))
-        palette = sns.color_palette("tab10", len(self.scores_gpt))
         for i, category in enumerate(self.scores_gpt):
             sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
@@ -70,8 +68,25 @@ class LLaMAScoreAnalyzer:
         ax.legend(title="Score Categories")
         return fig
     def score_analyzer(self):
         st.sidebar.markdown("""
                     This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
@@ -101,14 +116,15 @@ class LLaMAScoreAnalyzer:
         if detailed_view:
             for language in selected_languages:
                 language_data = combined_data[combined_data['Language'] == language]
-                title = f"Distribution of Scores Across Models for {language}"
                 fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
                 st.pyplot(fig)
-            for model in selected_models:
-                model_data = combined_data[combined_data['Model'] == model]
-                title = f"Distribution of Scores Across Languages for {model}"
-                fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
-                st.pyplot(fig)
             st.sidebar.markdown("""
                     Show additional evaluation scores and categories below:
@@ -116,34 +132,21 @@ class LLaMAScoreAnalyzer:
             additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
             if additional_score_categories:
                 additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
-                # for language in selected_languages:
-                #     language_data = combined_data[combined_data['Language'] == language]
-                #     title = f"Distribution of Scores Across Models for {language}"
-                #     fig = self.draw_specific_plots(language_data, additional_categories, 'Model', title)
-                #     st.pyplot(fig)
-                # for model in selected_models:
-                #     model_data = combined_data[combined_data['Model'] == model]
-                #     title = f"Distribution of Scores Across Languages for {model}"
-                #     fig = self.draw_specific_plots(model_data, additional_categories, 'Language', title)
-                #     st.pyplot(fig)
-            rouge_bleu_score = st.sidebar.checkbox("Rouge and BLEU Scores", value=False)
-            if rouge_bleu_score:
-                rouge_bleu_scores = st.sidebar.multiselect("Select Category", self.rouge_bleu, default="rougeL")
                 for language in selected_languages:
                     language_data = combined_data[combined_data['Language'] == language]
-                    title = f"Distribution of Scores Across Models for {language}"
-                    fig = self.draw_specific_plots(language_data, rouge_bleu_scores, 'Model', title)
-                    st.pyplot(fig)
-                for model in selected_models:
-                    model_data = combined_data[combined_data['Model'] == model]
-                    title = f"Distribution of Scores Across Languages for {model}"
-                    fig = self.draw_specific_plots(model_data, rouge_bleu_scores, 'Language', title)
                     st.pyplot(fig)
         else:
            for language in selected_languages:
                 for model in selected_models:
-                    title = f"Combined Density of All Categories for {language} - {model}"
                     fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
                                                                         (combined_data['Model'] == model)], title)
                     st.pyplot(fig)

             }
     def load_samples(self, lang):
         cols_to_show = ["instruction", "input", "output"]
         for model in self.DATA_PATH[lang]:
             df = pd.read_csv(self.DATA_PATH[lang][model])
     def draw_specific_plots(self, data, categories, x_variable, title):
         fig, ax = plt.subplots(figsize=(12, 6))
+        palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))
         for i, category in enumerate(categories):
             for j, unique_value in enumerate(data[x_variable].unique()):
     def draw_combined_density_plot(self, data, title):
         fig, ax = plt.subplots(figsize=(12, 8))
+        palette = sns.color_palette("pastel", len(self.scores_gpt))
         for i, category in enumerate(self.scores_gpt):
             sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
         ax.legend(title="Score Categories")
         return fig
+    def draw_bar_plot(self, data, categories, x_variable, title):
+        fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))
+        palette = sns.color_palette("pastel", len(categories))
+        if len(categories) == 1:
+            axs = [axs]  # Ensure axs is iterable even for a single plot
+        for i, category in enumerate(categories):
+            sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
+            axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
+            axs[i].set_xlabel(category, fontsize=12)
+            axs[i].set_ylabel("Count", fontsize=12)
+            axs[i].legend(title=x_variable)
+        plt.tight_layout()
+        return fig
     def score_analyzer(self):
         st.sidebar.markdown("""
                     This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
         if detailed_view:
             for language in selected_languages:
                 language_data = combined_data[combined_data['Language'] == language]
+                title = f"Distribution of {selected_gpt_scoring}  for {language}"
                 fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
                 st.pyplot(fig)
+            if len(selected_languages) > 1:
+                for model in selected_models:
+                    model_data = combined_data[combined_data['Model'] == model]
+                    title = f"Distribution of {selected_gpt_scoring} for {model}"
+                    fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
+                    st.pyplot(fig)
             st.sidebar.markdown("""
                     Show additional evaluation scores and categories below:
             additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
             if additional_score_categories:
                 additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
                 for language in selected_languages:
                     language_data = combined_data[combined_data['Language'] == language]
+                    title = f"{language}"
+                    fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
                     st.pyplot(fig)
+                if len(selected_languages) > 1:
+                    for model in selected_models:
+                        model_data = combined_data[combined_data['Model'] == model]
+                        title = f"{model}"
+                        fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
+                        st.pyplot(fig)
         else:
            for language in selected_languages:
                 for model in selected_models:
+                    title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
                     fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
                                                                         (combined_data['Model'] == model)], title)
                     st.pyplot(fig)

data/hindi_baseline_all_scores.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff