Spaces:

shreeyad
/

llama-3-scores

Sleeping

App Files Files Community

llama-3-scores / app.py

shreeyad

fix hindi lora path

fa3f4d0 4 months ago

raw

history blame

7.61 kB

	import streamlit as st

	import seaborn as sns
	import matplotlib.pyplot as plt
	import pandas as pd


	class LLaMAScoreAnalyzer:
	def __init__(self):
	self.languages = ["Nepali", "Hindi"]
	self.models = ["Baseline", "LoRA"]
	self.scores_gpt = ["relevance_score", "cc_score", "syntax_score", "complete_score"]
	self.rouge_bleu = ["rougeL", "bleu"]
	self.categories = ["hallucination_type"]
	self.DATA_PATH = {
	"Nepali": {"Baseline": "./data/nepali_baseline_all_scores.csv", "LoRA": "./data/nepali_lora_all_scores.csv"},
	"Hindi": {"Baseline": "./data/hindi_baseline_all_scores.csv", "LoRA": "./data/hindi_lora_all_scores.csv"}
	}

	def load_samples(self, lang):
	cols_to_show = ["instruction", "input", "output"]
	for model in self.DATA_PATH[lang]:
	df = pd.read_csv(self.DATA_PATH[lang][model])
	df.rename({"output": "expected_output"})
	df[model+"_Response"] = df["cleaned_response"]
	cols_to_show.append(model+"_Response")
	cols_to_show = cols_to_show + ["relevance_score", "cc_score", "syntax_score", "complete_score", "rougeL", "blue", "is_repeat", "hallucination_type"]
	df = df[[col for col in cols_to_show if col in df.columns]]
	st.write(df.sample(5))


	def load_data(self, lang, model):
	df = pd.read_csv(self.DATA_PATH[lang][model])
	df['Language'] = lang
	df['Model'] = model
	return df

	def draw_specific_plots(self, data, categories, x_variable, title):

	fig, ax = plt.subplots(figsize=(12, 6))

	palette = sns.color_palette("pastel", len(categories) * len(data[x_variable].unique()))

	for i, category in enumerate(categories):
	for j, unique_value in enumerate(data[x_variable].unique()):
	subset = data[data[x_variable] == unique_value]
	sns.kdeplot(data=subset, x=category, fill=True, common_norm=False, alpha=0.5,
	ax=ax, color=palette[i * len(data[x_variable].unique()) + j],
	label=f"{category} ({unique_value})")

	ax.set_title(title, fontsize=16)
	ax.set_xlabel("Score", fontsize=12)
	ax.set_ylabel("Density", fontsize=12)
	ax.legend(title="Category (Language/Model)")

	return fig

	def draw_combined_density_plot(self, data, title):
	fig, ax = plt.subplots(figsize=(12, 8))

	palette = sns.color_palette("pastel", len(self.scores_gpt))

	for i, category in enumerate(self.scores_gpt):
	sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])

	ax.set_title(title, fontsize=16)
	ax.set_xlabel("Score", fontsize=12)
	ax.set_ylabel("Density", fontsize=12)
	ax.legend(title="Score Categories")

	return fig

	def draw_bar_plot(self, data, categories, x_variable, title):
	fig, axs = plt.subplots(len(categories), 1, figsize=(10, 6 * len(categories)))

	palette = sns.color_palette("pastel", len(data))

	if len(categories) == 1:
	axs = [axs] # Ensure axs is iterable even for a single plot

	for i, category in enumerate(categories):
	sns.countplot(data=data, x=category, hue=x_variable, palette=palette, ax=axs[i])
	axs[i].set_title(f"Distribution of {category} for {title}", fontsize=16)
	axs[i].set_xlabel(category, fontsize=12)
	axs[i].set_ylabel("Count", fontsize=12)
	axs[i].legend(title=x_variable)

	plt.tight_layout()
	return fig

	def score_analyzer(self):
	st.sidebar.markdown("""
	This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
	""")
	st.title("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hind! ")
	st.markdown("""
	Full post here:
	""")
	show_samples = st.sidebar.checkbox("Show Sample Data", value=False)
	detailed_view = st.sidebar.checkbox("Enable Detailed Charts View", value=False)

	selected_languages = st.sidebar.multiselect("Select Languages", self.languages, default="Nepali")
	selected_gpt_scoring = st.sidebar.multiselect("Select Score Category", self.scores_gpt, default="relevance_score")
	selected_models = st.sidebar.multiselect("Select Models", self.models, default="Baseline")

	dfs = []
	for lang in selected_languages:
	for model in selected_models:
	df = self.load_data(lang, model)
	dfs.append(df)
	if show_samples:
	for lang in selected_languages:
	st.write(f"Sample data for {lang}")
	self.load_samples(lang)

	combined_data = pd.concat(dfs, ignore_index=True)
	if detailed_view:
	for language in selected_languages:
	language_data = combined_data[combined_data['Language'] == language]
	title = f"Distribution of Scores for {language}"
	fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
	st.pyplot(fig)
	if len(selected_languages) > 1:
	for model in selected_models:
	model_data = combined_data[combined_data['Model'] == model]
	title = f"Distribution of Scores for {model}"
	fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
	st.pyplot(fig)

	st.sidebar.markdown("""
	Show additional evaluation scores and categories below:
	""")
	additional_score_categories = st.sidebar.checkbox("Hallucination Statistics", value=False)
	if additional_score_categories:
	additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
	for language in selected_languages:
	language_data = combined_data[combined_data['Language'] == language]
	title = f"{language}"
	fig = self.draw_bar_plot(language_data, additional_categories, 'Model', title)
	st.pyplot(fig)
	if len(selected_languages) > 1:
	for model in selected_models:
	model_data = combined_data[combined_data['Model'] == model]
	title = f"{model}"
	fig = self.draw_bar_plot(model_data, additional_categories, 'Language', title)
	st.pyplot(fig)
	else:
	for language in selected_languages:
	for model in selected_models:
	title = f"Distribution of Scores for Different Evaluation Criterias for {language} [{model} Model]"
	fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
	(combined_data['Model'] == model)], title)
	st.pyplot(fig)




	def main():

	st.sidebar.header("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi!")

	analyzer = LLaMAScoreAnalyzer()
	analyzer.score_analyzer()



	if __name__ == "__main__":
	main()