loubnabnl's picture
loubnabnl HF staff
Create app.py
8aac646
raw
history blame
1.05 kB
import streamlit as st
from datasets import load_dataset
import os
HF_TOKEN = os.environ.get("HF_TOKEN", None)
st.set_page_config(page_title="Synthetic textbooks inspection", layout="wide")
st.title("Synthetic textbooks inspection")
st.markdown("Inspection of synthetic textbooks generated by `Falcon-180B-chat`")
@st.cache_data()
def load_data(source="all"):
ds = load_dataset("HuggingFaceTB/synthetic_textbooks_subset", split="train", use_auth_token=HF_TOKEN)
if source != "all":
ds = ds.filter(lambda x: x["source"] == source)
return ds
source = st.selectbox("Data source", ['all', 'wikihow','khan_academy', 'stanford_courses', 'rw_wikihow', 'rw_stanford'])
samples = load_data(source)
n_samples = len(samples)
index = st.number_input(f"Index of the sample (out of {n_samples}):", min_value=0, max_value=n_samples-1, value=0, step=1)
st.markdown(f"Displaying source: {source}")
st.subheader("Prompt")
st.markdown(samples[index_example]["prompt"])
st.subheader("Textbook")
st.markdown(samples[index_example]['textbook'])