Spaces:
Runtime error
Runtime error
import streamlit as st | |
from datasets import load_dataset | |
import os | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
st.set_page_config(page_title="SelfCheck", layout="wide") | |
st.title("SelfCheck scores") | |
def load_data(min_score=0.4, exclude_stories=True): | |
ds = load_dataset("loubnabnl/comsop_450_samples_detailed", split="train", token=HF_TOKEN, num_proc=2) | |
ds = ds.filter(lambda x: x["passage_score"] >= min_score) | |
if exclude_stories: | |
ds = ds.filter(lambda x: "story" not in x["format"]) | |
return ds | |
exclude_stories = st.checkbox("Exclude stories", False) | |
maximum_score = 0.7 if exclude_stories else 1.0 | |
min_value = st.slider('Select minimum selfcheck score', 0.0, maximum_score, 0.1, key='min_score') | |
ds = load_data(min_score=min_value, exclude_stories=exclude_stories) | |
index = st.number_input(f'Found {len(ds)} samples, choose one', min_value=0, max_value=len(ds)-1, value=0, step=1) | |
min_score = st.number_input(f'Choose threshold for diplayed inconsistent sentences', min_value=0.2, max_value=1.0, value=0.4, step=0.1) | |
# Load data based on slider values and checkbox status | |
sample = ds[index] | |
st.markdown(f"**Passage Score:** {sample['passage_score']:.2f}, **seed data**: {sample['seed_data']}, **format**: {sample['format']}.") | |
st.markdown("---") | |
st.subheader("📕 Generated text") | |
st.markdown(sample['original_text']) | |
# get inconsistent sentences | |
st.subheader(f"🤔 Sentences with a high inconsistency score (> {min_score})") | |
sentences = sample["sentences_and_scores"] | |
sentences = [e for e in sentences if e["score"] > min_score] | |
sentences = sorted(sentences, key=lambda d: d['score'], reverse=True) | |
for i, s in enumerate(sentences): | |
st.markdown(f"**Sentence {i}** with score {s['score']:.2f}:\n{s['sentence']}") | |
st.markdown("---") |