import streamlit as st import seaborn as sns import matplotlib.pyplot as plt import pandas as pd class LLaMAScoreAnalyzer: def __init__(self): self.languages = ["Nepali", "Hindi"] self.models = ["Baseline", "LoRA"] self.scores_gpt = ["relevance_score", "cc_score", "syntax_score", "complete_score"] self.rouge_bleu = ["rougeL", "bleu"] self.categories = ["hallucination_type"] self.DATA_PATH = { "Nepali": {"Baseline": "./data/nepali_baseline_all_scores.csv", "LoRA": "./data/nepali_lora_all_scores.csv"}, "Hindi": {"Baseline": "./data/hindi_baseline_all_scores.csv", "LoRA": "./data/hindi_lora_all_scores.csv"} } def load_samples(self, lang): cols_to_show = ["instruction", "input", "output"] for model in self.DATA_PATH[lang]: df = pd.read_csv(self.DATA_PATH[lang][model]) df.rename({"output": "expected_output"}) df[model+"_Response"] = df["cleaned_response"] cols_to_show.append(model+"_Response") cols_to_show = cols_to_show + ["relevance_score", "cc_score", "syntax_score", "complete_score", "rougeL", "blue", "is_repeat", "hallucination_type"] df = df[[col for col in cols_to_show if col in df.columns]] st.write(df.sample(5)) def load_data(self, lang, model): df = pd.read_csv(self.DATA_PATH[lang][model]) df['Language'] = lang df['Model'] = model return df def draw_specific_plots(self, data, categories, x_variable, title): fig, ax = plt.subplots(figsize=(12, 6)) palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique())) for i, category in enumerate(categories): for j, unique_value in enumerate(data[x_variable].unique()): subset = data[data[x_variable] == unique_value] sns.kdeplot(data=subset, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, color=palette[i * len(data[x_variable].unique()) + j], label=f"{category} ({unique_value})") ax.set_title(title, fontsize=16) ax.set_xlabel("Score", fontsize=12) ax.set_ylabel("Density", fontsize=12) ax.legend(title="Category (Language/Model)") return fig def draw_combined_density_plot(self, data, title): fig, ax = plt.subplots(figsize=(12, 8)) palette = sns.color_palette("pastel", len(self.scores_gpt)) for i, category in enumerate(self.scores_gpt): sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i]) ax.set_title(title, fontsize=16) ax.set_xlabel("Score", fontsize=12) ax.set_ylabel("Density", fontsize=12) ax.legend(title="Score Categories") return fig def draw_bar_plot(self, data, categories, x_variable, title): fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories))) palette = sns.color_palette("pastel", len(data)) if len(categories) == 1: axs = [axs] # Ensure axs is iterable even for a single plot for i, category in enumerate(categories): sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i]) axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16) axs[i].set_xlabel(category, fontsize=12) axs[i].set_ylabel("Count", fontsize=12) axs[i].legend(title=x_variable) plt.tight_layout() return fig def score_analyzer(self): st.sidebar.markdown(""" This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi" """) st.title("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hind! ") st.markdown(""" Full post here: """) show_samples = st.sidebar.checkbox("Show Sample Data", value=False) detailed_view = st.sidebar.checkbox("Enable Detailed Charts View", value=False) selected_languages = st.sidebar.multiselect("Select Languages", self.languages, default="Nepali") selected_gpt_scoring = st.sidebar.multiselect("Select Score Category", self.scores_gpt, default="relevance_score") selected_models = st.sidebar.multiselect("Select Models", self.models, default="Baseline") dfs = [] for lang in selected_languages: for model in selected_models: df = self.load_data(lang, model) dfs.append(df) if show_samples: for lang in selected_languages: st.write(f"Sample data for {lang}") self.load_samples(lang) combined_data = pd.concat(dfs, ignore_index=True) if detailed_view: for language in selected_languages: language_data = combined_data[combined_data['Language'] == language] title = f"Distribution of Scores for {language}" fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title) st.pyplot(fig) if len(selected_languages) > 1: for model in selected_models: model_data = combined_data[combined_data['Model'] == model] title = f"Distribution of Scores for {model}" fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title) st.pyplot(fig) st.sidebar.markdown(""" Show additional evaluation scores and categories below: """) additional_score_categories = st.sidebar.checkbox("Hallucination Statistics", value=False) if additional_score_categories: additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type") for language in selected_languages: language_data = combined_data[combined_data['Language'] == language] title = f"{language}" fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title) st.pyplot(fig) if len(selected_languages) > 1: for model in selected_models: model_data = combined_data[combined_data['Model'] == model] title = f"{model}" fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title) st.pyplot(fig) else: for language in selected_languages: for model in selected_models: title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]" fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) & (combined_data['Model'] == model)], title) st.pyplot(fig) def main(): st.sidebar.header("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi!") analyzer = LLaMAScoreAnalyzer() analyzer.score_analyzer() if __name__ == "__main__": main()