File size: 1,052 Bytes
8aac646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import streamlit as st
from datasets import load_dataset
import os 

HF_TOKEN = os.environ.get("HF_TOKEN", None)

st.set_page_config(page_title="Synthetic textbooks inspection", layout="wide")
st.title("Synthetic textbooks inspection")
st.markdown("Inspection of synthetic textbooks generated by `Falcon-180B-chat`")

@st.cache_data()
def load_data(source="all"):
    ds = load_dataset("HuggingFaceTB/synthetic_textbooks_subset", split="train", use_auth_token=HF_TOKEN)
    if source != "all":
        ds = ds.filter(lambda x: x["source"] == source)
    return ds


source = st.selectbox("Data source", ['all', 'wikihow','khan_academy', 'stanford_courses', 'rw_wikihow', 'rw_stanford'])
samples = load_data(source)
n_samples = len(samples)

index = st.number_input(f"Index of the sample (out of {n_samples}):",  min_value=0, max_value=n_samples-1, value=0, step=1)
st.markdown(f"Displaying source: {source}")
st.subheader("Prompt")
st.markdown(samples[index_example]["prompt"])

st.subheader("Textbook")
st.markdown(samples[index_example]['textbook'])