unlp / app.py
woters's picture
label update
fbeaf3c
raw
history blame
10.6 kB
import gradio as gr
import pandas as pd
import random
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
from trueskill import Rating
import trueskill
CSV_FILE_PATH = "qa_pairs.csv"
cred = credentials.Certificate("unlpboard_f.json")
firebase_admin.initialize_app(cred)
def list_models():
df = pd.read_csv(CSV_FILE_PATH)
return df['model'].unique().tolist()
def list_questions():
df = pd.read_csv(CSV_FILE_PATH)
return df['question'].unique().tolist()
def fetch_questions():
questions_ref = db.collection('questions')
docs = questions_ref.stream()
questions_list = []
for doc in docs:
question = doc.to_dict()
questions_list.append(question)
return questions_list
def display_answers(question, model1, model2, df):
#df = pd.read_csv(CSV_FILE_PATH)
answers = {
model1: "No answer available for Model 1",
model2: "No answer available for Model 2",
}
for model in [model1, model2]:
filtered_df = df[(df['question'] == question) & (df['model'] == model)]
if not filtered_df.empty:
answers[model] = f"**Answer:**\n{filtered_df['answer'].iloc[0]}"
return answers[model1], answers[model2]
def update_b(q,m1,a1,m2,a2):
print('Model1: ', random_model2)
print('Model2: ', random_model2)
q, m1, a1, m2, a2 = update_symbols(q, m1, a1, m2, a2)
b1 = gr.Button("Vote for Model 1",interactive=True)
b2 = gr.Button("It’s a tie!",interactive=True)
b3 = gr.Button("Vote for Model 2",interactive=True)
b4 = gr.Button("START!", visible = False)
return q, m1, a1, m2, a2, b1, b2, b3, b4
def update_symbols1(q,m1,a1,m2,a2):
print("Voted for Model 1")
log_vote(
model1=m1,
model2=m2,
question=q,
output1=a1,
output2=a2,
outcome=m1
)
votes_ref = db.collection('votes')
vote_doc = votes_ref.document(m1).get()
elo_count_1 = vote_doc.get('elo_rating')
elo1 = Rating(elo_count_1)
if vote_doc.exists:
votes_ref.document(m1).update({'win_count': firestore.Increment(1)})
else:
votes_ref.document(m1).set({'win_count': 1})
vote_doc = votes_ref.document(m2).get()
elo_count_2 = vote_doc.get('elo_rating')
elo2 = Rating(elo_count_2)
elo1, elo2 = trueskill.rate_1vs1(elo1, elo2)
votes_ref.document(m2).update({'elo_rating': elo2.mu})
votes_ref.document(m1).update({'elo_rating': elo1.mu})
if vote_doc.exists:
votes_ref.document(m2).update({'loss_count': firestore.Increment(1)})
else:
votes_ref.document(m2).set({'loss_count': 1})
return update_symbols(q, m1, a1, m2, a2)
def update_symbols2(q, m1, a1, m2, a2):
print("Voted for Spare")
log_vote(
model1=m1,
model2=m2,
question=q,
output1=a1,
output2=a2,
outcome='tie'
)
#update_total_votes()
return update_symbols(q, m1, a1, m2, a2)
def update_symbols3(q, m1, a1, m2, a2):
print("Voted for Model 2")
log_vote(
model1=m1,
model2=m2,
question=q,
output1=a1,
output2=a2,
outcome=m2
)
votes_ref = db.collection('votes')
vote_doc = votes_ref.document(m2).get()
elo_count_2 = vote_doc.get('elo_rating')
elo2 = Rating(elo_count_2)
if vote_doc.exists:
votes_ref.document(m2).update({'win_count': firestore.Increment(1)})
else:
votes_ref.document(m2).set({'win_count': 1})
vote_doc = votes_ref.document(m1).get()
elo_count_1 = vote_doc.get('elo_rating')
elo1 = Rating(elo_count_1)
elo1, elo2 = trueskill.rate_1vs1(elo2, elo1)
votes_ref.document(m2).update({'elo_rating': elo2.mu})
votes_ref.document(m1).update({'elo_rating': elo1.mu})
if vote_doc.exists:
votes_ref.document(m1).update({'loss_count': firestore.Increment(1)})
else:
votes_ref.document(m1).set({'loss_count': 1})
#update_total_votes()
return update_symbols(q, m1, a1, m2, a2)
def update_symbols(q,m1,a1,m2,a2):
random_question = random.choice(questions)
random_model1, random_model2 = random.sample(models, 2)
answer1, answer2 = display_answers(random_question, random_model1, random_model2, combined_df)
m1 = gr.Markdown(f"{random_model1}", visible=False)
a1 = gr.Markdown(answer1)
q = gr.Markdown(f"{random_question}")
m2 = gr.Markdown(f"{random_model2}", visible=False)
a2 = gr.Markdown(answer2)
return q,m1,a1,m2,a2
def update_total_votes():
votes_ref = db.collection('votes')
vote_doc = votes_ref.document('total').get()
if vote_doc.exists:
votes_ref.document('total').update({'count': firestore.Increment(1)})
else:
votes_ref.document('total').set({'count': 1})
def log_vote(model1, model2, question, output1, output2, outcome):
# Reference to the Firestore collection where votes will be logged
votes_log_ref = db.collection('votes_log')
# Create a new document for this vote
vote_data = {
'model1': model1,
'model2': model2,
'question': question,
'output1': output1,
'output2': output2,
'outcome': outcome,
'timestamp': firestore.SERVER_TIMESTAMP
}
# Add the vote document to Firestore
votes_log_ref.add(vote_data)
def fetch_and_format_leaderboard():
vote_counts_ref = db.collection('votes')
docs = vote_counts_ref.stream()
leaderboard = []
for doc in docs:
model_data = doc.to_dict()
model_name = doc.id
win_count = model_data.get('win_count', 0)
loss_count = model_data.get('loss_count', 0)
total_matches = win_count + loss_count
win_rate = (win_count / total_matches) * 100 if total_matches > 0 else 0
elo_rating = model_data.get('elo_rating', 0)
leaderboard.append({
"model": model_name,
"win_rate": win_rate,
"TrueSkill rating": elo_rating
})
# Sort the leaderboard by elo_rating in descending order
leaderboard.sort(key=lambda x: x['win_rate'], reverse=True)
leaderboard_df = pd.DataFrame(leaderboard)
leaderboard_df['Rank'] = [1,2,3,4,5,6]#leaderboard_df['win_rate'].rank(method='max', ascending=False).astype(int)
# Reorder columns to match your requirement
leaderboard_df = leaderboard_df[['Rank', 'model', 'win_rate', 'TrueSkill rating'
]]
# Format the DataFrame as a string for display; you might adjust this part based on how Gradio expects the data
# For Gradio, you might directly return the DataFrame instead of converting it to a string
return leaderboard_df
#questions = list_questions()
db = firestore.client()
def fetch_questions_c(collection):
questions_ref = db.collection(collection)
docs = questions_ref.stream()
questions_list = []
for doc in docs:
question = doc.to_dict()
questions_list.append(question)
return questions_list
codekobzar = fetch_questions_c('codekobzar')
gpt = fetch_questions_c('gpt-4')
llama = fetch_questions_c('llama-2-70b-chat')
sherlocknorag = fetch_questions_c('sherlock-no-rag')
sherlockrag = fetch_questions_c('sherlock-rag')
ukrainenow = fetch_questions_c('ukrainenow')
df1 = pd.DataFrame(codekobzar)
df2 = pd.DataFrame(gpt)
df3 = pd.DataFrame(llama)
df4 = pd.DataFrame(sherlocknorag)
df5 = pd.DataFrame(sherlockrag)
df6 = pd.DataFrame(ukrainenow)
df1['model'] = 'codekobzar'
df2['model'] = 'gpt-4'
df3['model'] = 'llama-2-70b-chat'
df4['model'] = 'sherlock-no-rag'
df5['model'] = 'sherlock-rag'
df6['model'] = 'ukrainenow'
combined_df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
combined_df.drop('input',axis=1,inplace=True)
combined_df.rename(columns={'instruction': 'question', 'output': 'answer'}, inplace=True)
models = ['codekobzar','gpt-4','llama-2-70b-chat','sherlock-no-rag','sherlock-rag','ukrainenow']#list_models()
votes_ref = db.collection('votes')
for model in models:
vote_doc = votes_ref.document(model).get()
if vote_doc.exists:
print("-------")
else:
votes_ref.document(model).set({'win_count': 0})
votes_ref.document(model).set({'loss_count': 0})
votes_ref.document(model).set({'elo_rating': 25})
random_question = 'Click any button to start!'
random_model1, random_model2 = '1', '2'
answer1, answer2 = display_answers(random_question, random_model1, random_model2,combined_df)
questions = []
questions_ = fetch_questions()
for question in questions_:
questions.append(question['question_text'])
votes_ref = db.collection('votes')
def create_app():
print('-----------------------')
print(random_question)
print(random_model1)
print('-----!!!!!!!!!!!!!')
with gr.Blocks() as app:
q = gr.Markdown(f"### Question: {random_question}")
with gr.Row():
with gr.Column():
m1 = gr.Markdown(f"{random_model1}", visible=False)
a1 = gr.Markdown(answer1)
with gr.Column():
m2 = gr.Markdown(f"{random_model2}", visible=False)
a2 = gr.Markdown(answer2)
with gr.Row():
b1 = gr.Button("Vote for Model 1",interactive=False)
b2 = gr.Button("It’s a tie!",interactive=False)
b3 = gr.Button("Vote for Model 2",interactive=False)
with gr.Row():
b4 = gr.Button("START!", interactive=True)
#with gr.Row():
# b5 = gr.Button("Show Leaderboard")
initial_leaderboard_data = fetch_and_format_leaderboard()
#leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",lines=30, visible=True)
leaderboard_display = gr.Dataframe(value=initial_leaderboard_data, label="Leaderboard")
#b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])
b1.click(update_symbols1, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2])
b2.click(update_symbols2, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
b3.click(update_symbols3, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
leaderboard_button = gr.Button("Refresh Leaderboard")
leaderboard_button.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
return app
app = create_app()
app.launch()