|
import streamlit as st |
|
import datasets |
|
|
|
humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf") |
|
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer") |
|
|
|
st.markdown("> <i>This is a viewer for the HumanEval-V benchmark, which consists of 108 coding tasks. Use the navigation buttons or enter an index to browse the tasks. If you encounter any issues, we encourage you to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True) |
|
st.markdown("---") |
|
|
|
max_index = 108 |
|
|
|
|
|
if 'index' not in st.session_state: |
|
st.session_state.index = 1 |
|
|
|
buttons = st.columns([2, 1.1, 5.9]) |
|
|
|
with buttons[0]: |
|
st.markdown("# HumanEval-V Viewer") |
|
|
|
with buttons[1]: |
|
|
|
index_input = st.number_input( |
|
f"Go to index (1-{max_index}):", |
|
min_value=1, |
|
max_value=108, |
|
value=st.session_state.index, |
|
key="index_input", |
|
help="Enter an index and jump to that index.", |
|
step=1 |
|
) |
|
|
|
coding_task = humaneval_v_data[index_input-1] |
|
qid = coding_task["qid"] |
|
image = coding_task["image"] |
|
function_signature = coding_task["function_signature"] |
|
ground_truth = coding_task["ground_truth_solution"] |
|
test_script = coding_task["test_script"] |
|
|
|
upper_columns = st.columns([2, 7]) |
|
with upper_columns[0]: |
|
st.markdown(f"### Question ID: {qid}") |
|
st.image(image, use_column_width=True) |
|
st.markdown("---") |
|
with upper_columns[1]: |
|
st.markdown(f"### Function Signature:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{function_signature} |
|
```""") |
|
st.markdown(f"### Test Script:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{test_script} |
|
```""") |
|
st.markdown(f"### Ground Truth Solution:") |
|
st.markdown(f"") |
|
st.markdown(f"""```python |
|
{ground_truth} |
|
```""") |