import streamlit as st from datasets import load_dataset st.set_page_config( page_icon="🧊", layout="wide", ) st.write( "This is an application for viewing different generations for the same prompt. The generations vary depending on the checkpoint used and also the parameters used for the generation." ) HF_API_TOKEN = st.secrets["HF_API_TOKEN"] PROMPT_COLOR = "#CA437E" def safe_text(text): text = text.replace("\n", "
") return f"
{text}
" def prompt_markup_format(text): return f'<*font color="black">{text}' def generation_markup_format(text): return f"{text}" ds = load_dataset("SaulLu/bloom-generations", use_auth_token=HF_API_TOKEN) ds = ds["train"] possible_langs = ds.unique("lang") col_1, col_2 = st.columns(2) with col_1: st.markdown("

Prompt

", unsafe_allow_html=True) chosen_lang = st.selectbox("Choose a lang", possible_langs + ["all"]) if chosen_lang == "all": ds_lang = ds else: ds_lang = ds.filter( lambda exs: [lang == chosen_lang for lang in exs["lang"]], batched=True ) possible_prompts = ds_lang.unique("prompt") chosen_prompt = st.selectbox("Choose a prompt", possible_prompts) st.markdown(safe_text(chosen_prompt), unsafe_allow_html=True) sub_ds = ds_lang.filter( lambda exs: [prompt == chosen_prompt for prompt in exs["prompt"]], batched=True ) with col_2: st.markdown( "

Generation

", unsafe_allow_html=True ) index_sample = st.number_input( "Index of the chosen generation", min_value=0, max_value=len(sub_ds) - 1, value=0, step=1, ) sample = sub_ds[index_sample] generation = sample["generation"] stop_index_sample = st.number_input( "Stop generation at character number", min_value=0, max_value=len(generation), value=len(generation), step=1, ) markdown_text = generation_markup_format(safe_text(generation[:stop_index_sample])) st.markdown(markdown_text, unsafe_allow_html=True) st.markdown( "

Generation configuration

", unsafe_allow_html=True, ) config = { key: value for key, value in sample.items() if key not in ["prompt", "generation"] } config