Spaces:

woters
/

unlp

Paused

File size: 10,552 Bytes

import gradio as gr
import pandas as pd
import random
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
from trueskill import Rating
import trueskill

CSV_FILE_PATH = "qa_pairs.csv"

cred = credentials.Certificate("unlpboard_f.json")
firebase_admin.initialize_app(cred)

def list_models():
    df = pd.read_csv(CSV_FILE_PATH)
    return df['model'].unique().tolist()


def list_questions():
    df = pd.read_csv(CSV_FILE_PATH)
    return df['question'].unique().tolist()

def fetch_questions():
    questions_ref = db.collection('questions')
    docs = questions_ref.stream()
    questions_list = []
    for doc in docs:
        question = doc.to_dict()
        questions_list.append(question)
    return questions_list


def display_answers(question, model1, model2, df):
    #df = pd.read_csv(CSV_FILE_PATH)
    answers = {
        model1: "No answer available for Model 1",
        model2: "No answer available for Model 2",
    }
    for model in [model1, model2]:
        filtered_df = df[(df['question'] == question) & (df['model'] == model)]
        if not filtered_df.empty:
            answers[model] = f"**Answer:**\n{filtered_df['answer'].iloc[0]}"
    return answers[model1], answers[model2]


def update_b(q,m1,a1,m2,a2):
    print('Model1: ', random_model2)
    print('Model2: ', random_model2)
    q, m1, a1, m2, a2 = update_symbols(q, m1, a1, m2, a2)
    b1 = gr.Button("Vote for Model 1",interactive=True)
    b2 = gr.Button("It’s a tie!",interactive=True)
    b3 = gr.Button("Vote for Model 2",interactive=True)
    b4 = gr.Button("START!", visible = False)
    return q, m1, a1, m2, a2, b1, b2, b3, b4


def update_symbols1(q,m1,a1,m2,a2):
    print("Voted for Model 1")
    log_vote(
        model1=m1,
        model2=m2,
        question=q,
        output1=a1,
        output2=a2,
        outcome=m1
    )
    votes_ref = db.collection('votes')
    vote_doc = votes_ref.document(m1).get()
    elo_count_1 = vote_doc.get('elo_rating')
    elo1 = Rating(elo_count_1)
    if vote_doc.exists:
        votes_ref.document(m1).update({'win_count': firestore.Increment(1)})
    else:
        votes_ref.document(m1).set({'win_count': 1})
    vote_doc = votes_ref.document(m2).get()
    elo_count_2 = vote_doc.get('elo_rating')
    elo2 = Rating(elo_count_2)
    elo1, elo2 = trueskill.rate_1vs1(elo1, elo2)
    votes_ref.document(m2).update({'elo_rating': elo2.mu})
    votes_ref.document(m1).update({'elo_rating': elo1.mu})
    if vote_doc.exists:
        votes_ref.document(m2).update({'loss_count': firestore.Increment(1)})
    else:
        votes_ref.document(m2).set({'loss_count': 1})

    return update_symbols(q, m1, a1, m2, a2)


def update_symbols2(q, m1, a1, m2, a2):
    print("Voted for Spare")
    log_vote(
        model1=m1,
        model2=m2,
        question=q,
        output1=a1,
        output2=a2,
        outcome='tie'
    )
    #update_total_votes()
    return update_symbols(q, m1, a1, m2, a2)

def update_symbols3(q, m1, a1, m2, a2):
    print("Voted for Model 2")
    log_vote(
        model1=m1,
        model2=m2,
        question=q,
        output1=a1,
        output2=a2,
        outcome=m2
    )
    votes_ref = db.collection('votes')
    vote_doc = votes_ref.document(m2).get()
    elo_count_2 = vote_doc.get('elo_rating')
    elo2 = Rating(elo_count_2)
    if vote_doc.exists:
        votes_ref.document(m2).update({'win_count': firestore.Increment(1)})
    else:
        votes_ref.document(m2).set({'win_count': 1})
    vote_doc = votes_ref.document(m1).get()
    elo_count_1 = vote_doc.get('elo_rating')
    elo1 = Rating(elo_count_1)
    elo1, elo2 = trueskill.rate_1vs1(elo2, elo1)
    votes_ref.document(m2).update({'elo_rating': elo2.mu})
    votes_ref.document(m1).update({'elo_rating': elo1.mu})
    if vote_doc.exists:
        votes_ref.document(m1).update({'loss_count': firestore.Increment(1)})
    else:
        votes_ref.document(m1).set({'loss_count': 1})
    #update_total_votes()
    return update_symbols(q, m1, a1, m2, a2)

def update_symbols(q,m1,a1,m2,a2):
    random_question = random.choice(questions)
    random_model1, random_model2 = random.sample(models, 2)
    answer1, answer2 = display_answers(random_question, random_model1, random_model2, combined_df)
    m1 = gr.Markdown(f"{random_model1}", visible=False)
    a1 = gr.Markdown(answer1)
    q = gr.Markdown(f"{random_question}")
    m2 = gr.Markdown(f"{random_model2}", visible=False)
    a2 = gr.Markdown(answer2)
    return q,m1,a1,m2,a2

def update_total_votes():
    votes_ref = db.collection('votes')
    vote_doc = votes_ref.document('total').get()
    if vote_doc.exists:
        votes_ref.document('total').update({'count': firestore.Increment(1)})
    else:
        votes_ref.document('total').set({'count': 1})

def log_vote(model1, model2, question, output1, output2, outcome):
    # Reference to the Firestore collection where votes will be logged
    votes_log_ref = db.collection('votes_log')

    # Create a new document for this vote
    vote_data = {
        'model1': model1,
        'model2': model2,
        'question': question,
        'output1': output1,
        'output2': output2,
        'outcome': outcome,
        'timestamp': firestore.SERVER_TIMESTAMP
    }

    # Add the vote document to Firestore
    votes_log_ref.add(vote_data)


def fetch_and_format_leaderboard():
    vote_counts_ref = db.collection('votes')
    docs = vote_counts_ref.stream()

    leaderboard = []
    for doc in docs:
        model_data = doc.to_dict()
        model_name = doc.id
        win_count = model_data.get('win_count', 0)
        loss_count = model_data.get('loss_count', 0)
        total_matches = win_count + loss_count
        win_rate = (win_count / total_matches) * 100 if total_matches > 0 else 0
        elo_rating = model_data.get('elo_rating', 0)

        leaderboard.append({
            "model": model_name,
            "win_rate": win_rate,
            "TrueSkill rating": elo_rating
        })

    # Sort the leaderboard by elo_rating in descending order
    leaderboard.sort(key=lambda x: x['win_rate'], reverse=True)
    leaderboard_df = pd.DataFrame(leaderboard)
    leaderboard_df['Rank'] = [1,2,3,4,5,6]#leaderboard_df['win_rate'].rank(method='max', ascending=False).astype(int)

    # Reorder columns to match your requirement
    leaderboard_df = leaderboard_df[['Rank', 'model', 'win_rate', 'TrueSkill rating'
                                     ]]

    # Format the DataFrame as a string for display; you might adjust this part based on how Gradio expects the data
    # For Gradio, you might directly return the DataFrame instead of converting it to a string
    return leaderboard_df

#questions = list_questions()



db = firestore.client()

def fetch_questions_c(collection):
    questions_ref = db.collection(collection)
    docs = questions_ref.stream()
    questions_list = []
    for doc in docs:
        question = doc.to_dict()
        questions_list.append(question)
    return questions_list

codekobzar = fetch_questions_c('codekobzar')
gpt = fetch_questions_c('gpt-4')
llama  = fetch_questions_c('llama-2-70b-chat')
sherlocknorag  = fetch_questions_c('sherlock-no-rag')
sherlockrag  = fetch_questions_c('sherlock-rag')
ukrainenow  = fetch_questions_c('ukrainenow')

df1 = pd.DataFrame(codekobzar)
df2 = pd.DataFrame(gpt)
df3 = pd.DataFrame(llama)
df4 = pd.DataFrame(sherlocknorag)
df5 = pd.DataFrame(sherlockrag)
df6 = pd.DataFrame(ukrainenow)
df1['model'] = 'codekobzar'
df2['model'] = 'gpt-4'
df3['model'] = 'llama-2-70b-chat'
df4['model'] = 'sherlock-no-rag'
df5['model'] = 'sherlock-rag'
df6['model'] = 'ukrainenow'


combined_df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
combined_df.drop('input',axis=1,inplace=True)
combined_df.rename(columns={'instruction': 'question', 'output': 'answer'}, inplace=True)

models = ['codekobzar','gpt-4','llama-2-70b-chat','sherlock-no-rag','sherlock-rag','ukrainenow']#list_models()

votes_ref = db.collection('votes')
for model in models:
    vote_doc = votes_ref.document(model).get()
    if vote_doc.exists:
        print("-------")
    else:
        votes_ref.document(model).set({'win_count': 0})
        votes_ref.document(model).set({'loss_count': 0})
        votes_ref.document(model).set({'elo_rating': 25})



random_question = 'Click any button to start!'
random_model1, random_model2 = '1', '2'
answer1, answer2 = display_answers(random_question, random_model1, random_model2,combined_df)

questions = []
questions_ = fetch_questions()
for question in questions_:
    questions.append(question['question_text'])

votes_ref = db.collection('votes')


def create_app():

    print('-----------------------')
    print(random_question)
    print(random_model1)
    print('-----!!!!!!!!!!!!!')

    with gr.Blocks() as app:
        q = gr.Markdown(f"### Question: {random_question}")

        with gr.Row():
            with gr.Column():
                m1 = gr.Markdown(f"{random_model1}", visible=False)
                a1 = gr.Markdown(answer1)

            with gr.Column():
                m2 = gr.Markdown(f"{random_model2}", visible=False)
                a2 = gr.Markdown(answer2)

        with gr.Row():
            b1 = gr.Button("Vote for Model 1",interactive=False)
            b2 = gr.Button("It’s a tie!",interactive=False)
            b3 = gr.Button("Vote for Model 2",interactive=False)
        with gr.Row():
            b4 = gr.Button("START!", interactive=True)
        #with gr.Row():
        #    b5 = gr.Button("Show Leaderboard")

        initial_leaderboard_data = fetch_and_format_leaderboard()
        #leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",lines=30, visible=True)
        leaderboard_display = gr.Dataframe(value=initial_leaderboard_data, label="Leaderboard")
        #b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)

        b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])
        b1.click(update_symbols1, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2])
        b2.click(update_symbols2, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
        b3.click(update_symbols3, inputs=[q, m1, a1, m2, a2], outputs=[q, m1, a1, m2, a2])
        leaderboard_button = gr.Button("Refresh Leaderboard")
        leaderboard_button.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)

    return app

app = create_app()
app.launch()