File size: 2,674 Bytes
c173eef
 
 
 
5cc465a
c173eef
5cc465a
c173eef
 
92274b2
 
 
5cc465a
 
c173eef
 
 
6eb17e2
5cc465a
 
 
 
 
 
c173eef
911c9b4
 
c173eef
 
5cc465a
c173eef
 
 
5cc465a
6713f3f
c173eef
 
 
 
911c9b4
 
5cc465a
c173eef
6713f3f
5cc465a
6713f3f
 
92274b2
6713f3f
 
5cc465a
 
 
911c9b4
 
 
6713f3f
911c9b4
 
 
 
 
 
 
c173eef
 
911c9b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import streamlit as st
import json
import tarfile
from huggingface_hub import HfFileSystem

hf_fs = HfFileSystem(token=os.getenv("HF_TOKEN"))
st.set_page_config(layout="wide")

# Disable scroll bar
st.html("<style> .main {overflow: hidden} </style>")

DATASET_ID: str = "LLM360/k2-eval-gallery"
EVAL_DIR: str = os.path.join("datasets", DATASET_ID, "k2-eval-results")
st.title("K2 Evaluation Gallery")
st.markdown("""The K2 gallery allows one to browse the output of various evaluations on intermediate K2 checkpoints, which provides an intuitive understanding on how the model develops and improves over time.""")


def hf_listdir(parent_dir: str):
    return (os.path.basename(file) for file in hf_fs.ls(
        parent_dir, detail=False
    ))


with st.sidebar:
    html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />"
    st.markdown(html, unsafe_allow_html=True)

    metric = st.radio(
        "Choose a metric", options=hf_listdir(EVAL_DIR), help="type of evaluation benchmark task"
    )

    n_shot = st.radio(
        "Selece an n-shot number", hf_listdir(os.path.join(EVAL_DIR, metric)),
        help="number of examples included in few-shot prompting"
    )

col1, col2 = st.columns(2)

def render_column(col_label):
    st.header(f"Checkpoint {col_label}")
    ckpt = st.select_slider('Select a checkpoint', sorted(hf_listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1', help="checkpoint index from 3 to 360")
    st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
    suffix, result_file = ".tar.gz", "results.json"
    file_list: list = sorted(f_name[:-len(suffix)] for f_name in hf_listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt)))
    if result_file in file_list:
        file_list.remove(result_file)
        file_list = file_list + [result_file]
    file = st.selectbox("Select a file", file_list, key=col_label + '2', help="a list of raw output files from evaluation results")
    file += suffix
    with tarfile.open(fileobj=hf_fs.open(
        os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "rb"
    ), mode="r:gz") as tar:
        f = tar.extractfile(tar.next())
        eval_json = json.load(f)
        if isinstance(eval_json, list):
            doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3', help="index of a specific question/task in current file")
            st.json(eval_json[doc_id])
        else:
            st.json(eval_json)
        f.close()

with col1:
    render_column('A')

with col2:
    render_column('B')