shreeyad commited on
Commit
ab7200a
·
0 Parent(s):

add streamlit app

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#1d5965"
4
+ textColor="#1d5965"
5
+
6
+
README.md ADDED
File without changes
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+
7
+
8
+ class LLaMAScoreAnalyzer:
9
+ def __init__(self):
10
+ self.languages = ["Nepali", "Hindi"]
11
+ self.models = ["Baseline", "LoRA"]
12
+ self.scores_gpt = ["relevance_score", "cc_score", "syntax_score", "complete_score"]
13
+ self.rouge_bleu = ["rougeL", "bleu"]
14
+ self.categories = ["hallucination_type", "is_repeat"]
15
+ self.DATA_PATH = {
16
+ "Nepali": {"Baseline": "./data/nepali_baseline_all_scores.csv", "LoRA": "./data/nepali_lora_all_scores.csv"},
17
+ "Hindi": {"Baseline": "./data/hindi_baseline_all_scores.csv", "LoRA": "./data/nepali_baseline_all_scores.csv"}
18
+ }
19
+
20
+ def load_samples(self, lang):
21
+ # Show samples for data for selected languages
22
+ # st.write(data.sample(5))
23
+ cols_to_show = ["instruction", "input", "output"]
24
+ for model in self.DATA_PATH[lang]:
25
+ df = pd.read_csv(self.DATA_PATH[lang][model])
26
+ df.rename({"output": "expected_output"})
27
+ df[model+"_Response"] = df["cleaned_response"]
28
+ cols_to_show.append(model+"_Response")
29
+ cols_to_show = cols_to_show + ["relevance_score", "cc_score", "syntax_score", "complete_score", "rougeL", "blue", "is_repeat", "hallucination_type"]
30
+ df = df[[col for col in cols_to_show if col in df.columns]]
31
+ st.write(df.sample(5))
32
+
33
+
34
+ def load_data(self, lang, model):
35
+ df = pd.read_csv(self.DATA_PATH[lang][model])
36
+ df['Language'] = lang
37
+ df['Model'] = model
38
+ return df
39
+
40
+ def draw_specific_plots(self, data, categories, x_variable, title):
41
+ fig, ax = plt.subplots(figsize=(12, 6))
42
+
43
+ palette = sns.color_palette("tab10", len(categories) * len(data[x_variable].unique()))
44
+
45
+ for i, category in enumerate(categories):
46
+ for j, unique_value in enumerate(data[x_variable].unique()):
47
+ subset = data[data[x_variable] == unique_value]
48
+ sns.kdeplot(data=subset, x=category, fill=True, common_norm=False, alpha=0.5,
49
+ ax=ax, color=palette[i * len(data[x_variable].unique()) + j],
50
+ label=f"{category} ({unique_value})")
51
+
52
+ ax.set_title(title, fontsize=16)
53
+ ax.set_xlabel(f"{x_variable}", fontsize=12)
54
+ ax.set_ylabel("Density", fontsize=12)
55
+ ax.legend(title="Category (Language/Model)")
56
+
57
+ return fig
58
+
59
+ def draw_combined_density_plot(self, data, title):
60
+ fig, ax = plt.subplots(figsize=(12, 8))
61
+
62
+ palette = sns.color_palette("tab10", len(self.scores_gpt))
63
+
64
+ for i, category in enumerate(self.scores_gpt):
65
+ sns.kdeplot(data=data, x=category, fill=True, common_norm=False, alpha=0.5, ax=ax, label=category, color=palette[i])
66
+
67
+ ax.set_title(title, fontsize=16)
68
+ ax.set_xlabel("Score", fontsize=12)
69
+ ax.set_ylabel("Density", fontsize=12)
70
+ ax.legend(title="Score Categories")
71
+
72
+ return fig
73
+
74
+
75
+ def score_analyzer(self):
76
+ st.sidebar.markdown("""
77
+ This App was created as a part of the project: "Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi"
78
+ """)
79
+ st.title("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hind! ")
80
+ st.markdown("""
81
+ Full post here:
82
+ """)
83
+ show_samples = st.sidebar.checkbox("Show Sample Data", value=False)
84
+ detailed_view = st.sidebar.checkbox("Enable Detailed Charts View", value=False)
85
+
86
+ selected_languages = st.sidebar.multiselect("Select Languages", self.languages, default="Nepali")
87
+ selected_gpt_scoring = st.sidebar.multiselect("Select Score Category", self.scores_gpt, default="relevance_score")
88
+ selected_models = st.sidebar.multiselect("Select Models", self.models, default="Baseline")
89
+
90
+ dfs = []
91
+ for lang in selected_languages:
92
+ for model in selected_models:
93
+ df = self.load_data(lang, model)
94
+ dfs.append(df)
95
+ if show_samples:
96
+ for lang in selected_languages:
97
+ st.write(f"Sample data for {lang}")
98
+ self.load_samples(lang)
99
+
100
+ combined_data = pd.concat(dfs, ignore_index=True)
101
+ if detailed_view:
102
+ for language in selected_languages:
103
+ language_data = combined_data[combined_data['Language'] == language]
104
+ title = f"Distribution of Scores Across Models for {language}"
105
+ fig = self.draw_specific_plots(language_data, selected_gpt_scoring, 'Model', title)
106
+ st.pyplot(fig)
107
+ for model in selected_models:
108
+ model_data = combined_data[combined_data['Model'] == model]
109
+ title = f"Distribution of Scores Across Languages for {model}"
110
+ fig = self.draw_specific_plots(model_data, selected_gpt_scoring, 'Language', title)
111
+ st.pyplot(fig)
112
+
113
+ st.sidebar.markdown("""
114
+ Show additional evaluation scores and categories below:
115
+ """)
116
+ additional_score_categories = st.sidebar.checkbox("Hallucination and Instruction Repeat Statistics", value=False)
117
+ if additional_score_categories:
118
+ additional_categories = st.sidebar.multiselect("Select Category", self.categories, default="hallucination_type")
119
+ # for language in selected_languages:
120
+ # language_data = combined_data[combined_data['Language'] == language]
121
+ # title = f"Distribution of Scores Across Models for {language}"
122
+ # fig = self.draw_specific_plots(language_data, additional_categories, 'Model', title)
123
+ # st.pyplot(fig)
124
+ # for model in selected_models:
125
+ # model_data = combined_data[combined_data['Model'] == model]
126
+ # title = f"Distribution of Scores Across Languages for {model}"
127
+ # fig = self.draw_specific_plots(model_data, additional_categories, 'Language', title)
128
+ # st.pyplot(fig)
129
+
130
+ rouge_bleu_score = st.sidebar.checkbox("Rouge and BLEU Scores", value=False)
131
+ if rouge_bleu_score:
132
+ rouge_bleu_scores = st.sidebar.multiselect("Select Category", self.rouge_bleu, default="rougeL")
133
+ for language in selected_languages:
134
+ language_data = combined_data[combined_data['Language'] == language]
135
+ title = f"Distribution of Scores Across Models for {language}"
136
+ fig = self.draw_specific_plots(language_data, rouge_bleu_scores, 'Model', title)
137
+ st.pyplot(fig)
138
+ for model in selected_models:
139
+ model_data = combined_data[combined_data['Model'] == model]
140
+ title = f"Distribution of Scores Across Languages for {model}"
141
+ fig = self.draw_specific_plots(model_data, rouge_bleu_scores, 'Language', title)
142
+ st.pyplot(fig)
143
+ else:
144
+ for language in selected_languages:
145
+ for model in selected_models:
146
+ title = f"Combined Density of All Categories for {language} - {model}"
147
+ fig = self.draw_combined_density_plot(combined_data[(combined_data['Language'] == language) &
148
+ (combined_data['Model'] == model)], title)
149
+ st.pyplot(fig)
150
+
151
+
152
+
153
+
154
+ def main():
155
+
156
+ st.sidebar.header("Findings from Fine-tuning LLaMA 3 with Low-Rank Adaptation for Nepali and Hindi!")
157
+
158
+ analyzer = LLaMAScoreAnalyzer()
159
+ analyzer.score_analyzer()
160
+
161
+
162
+
163
+ if __name__ == "__main__":
164
+ main()
165
+
data/hindi_baseline_all_scores.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/nepali_baseline_all_scores.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/nepali_lora_all_scores.csv ADDED
The diff for this file is too large to render. See raw diff