inspect_selfcheck

Runtime error

App Files Files Community

loubnabnl HF staff commited on Mar 7, 2024

Commit

042d897

verified ·

1 Parent(s): e8b851a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -54

app.py CHANGED Viewed

@@ -4,64 +4,22 @@ import os
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-st.set_page_config(page_title="Web Clusters inspection", layout="wide")
-st.title("Web clusters inspection")
-st.markdown("""
-We clustered 100k web samples using [text-clustering](https://github.com/huggingface/text-clustering).
-Our approach involved prompting Mixtral to evaluate whether the topics in each cluster are educational or could be considered college material using a score from 1 to 10. \
-Technically, we provide it with 10 random examples from the cluster in the prompt and ask it to judge their topics.
-Additionally, the model was tasked with finding the topic of each cluster (based on the 10 random examples).
-""")
 @st.cache_data
-def load_data(min_score=1, max_score=10, show_special=False):
-    # HuggingFaceTB/FW_clusters_free_topics
-    ds = load_dataset("HuggingFaceTB/FW_clusters_100k_145_topics", split="train", token=HF_TOKEN, num_proc=2)
-    def filter_func(x):
-        try:
-            score = int(x['educational_score'])
-            value = False if show_special else min_score <= score <= max_score
-            return value
-        except (ValueError, TypeError):
-            # Return True if show_special is checked and educational_score is None or ''
-            return show_special
-    ds = ds.filter(filter_func)
     return ds
-st.subheader("Cluster information")
-col_1, col_2, col_3 = st.columns(3)
-with col_1:
-    show_special = st.checkbox('Show only clusters with undefined educational score', False)
-with col_2:
-    min_value = st.slider('Select minimum educational score', 1, 10, 1, key='min_score')
-with col_3:
-    max_value = st.slider('Select maximum educational score', 1, 10, 10, key='max_score')
-# Load data based on slider values and checkbox status
-ds = load_data(min_value, max_value, show_special)
-categories = list(set(ds["category"]))
-selected_category = st.selectbox("Select a topic", categories)
-selected_cluster = ds.filter(lambda x: x['category'] == selected_category)
-# Select sample index
-n_samples = len(selected_cluster)
-if n_samples > 0:
-    col_1, col_2 = st.columns(2)
-    with col_1:
-        index_cluster = st.number_input(f"Found {len(selected_cluster)} clusters, choose one",  min_value=0, max_value=len(selected_cluster)-1, value=0, step=1)
-    files = selected_cluster[index_cluster]["examples"]
-    with col_2:
-        index_example = st.number_input(f"Found {len(files)} files in the cluster, choose one",  min_value=0, max_value=len(files)-1, value=0, step=1)
-    sample = files[index_example]
-    st.markdown(f"**Educational score of the cluster**: {selected_cluster[index_cluster]['educational_score']}")
-    st.markdown(sample)
-else:
-    st.markdown("No files found, change the cluster.")

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+st.set_page_config(page_title="SelfCheck", layout="wide")
+st.title("SelfCheck scores")
 @st.cache_data
+def load_data(min_score=0.4):
+    ds = load_dataset("HuggingFaceTB/hallucinations_450_samples_scores", split="train", token=HF_TOKEN, num_proc=2)
+    ds = ds.filter(lambda x: x["passage_score"] >= min_score)
     return ds
+min_value = st.slider('Select minimum selfcheck score', 0.0, 1.0, 0.1, key='min_score')
+ds = load_data(min_score=0.2)
+index = st.number_input(f'Found {len(ds)} samples, choose one', min_value=0, max_value=len(ds)-1, value=0, step=1)
+# Load data based on slider values and checkbox status
+sample = ds[index]
+st.markdown(f"**Passage Score:** {sample['passage_score']}, seed data: {sample['seed_data']}, format: {sample['format']}.")
+st.markdown(sample['original_text']}")