Spaces:
Runtime error
Runtime error
import streamlit as st | |
from datasets import load_dataset | |
from sentence_transformers import SentenceTransformer | |
def search(text, model, ds, n): | |
encoded_text = model.encode(text) | |
scores, retrieved_examples = ds.get_nearest_examples('embedding', encoded_text, k=n) | |
matching_titles = retrieved_examples["title"] | |
urls = retrieved_examples["link"] | |
contents = retrieved_examples["content"] | |
return list(zip(matching_titles, [c[:150] for c in contents], urls, scores)) | |
def get_dataset(): | |
ds = load_dataset("justinian336/salvadoran-news-embedded")["train"] | |
ds.add_faiss_index(column="embedding") | |
return ds | |
def get_model(): | |
if "model" not in st.session_state: | |
st.session_state["model"] = SentenceTransformer("justinian336/chupeto") | |
ds = get_dataset() | |
get_model() | |
st.markdown("# Buscador de Noticias Salvadoreñas") | |
search_text = st.text_input(label="Búsqueda") | |
if search_text: | |
search_results = search(search_text, st.session_state["model"], ds, 10) | |
for title, content, url, _ in search_results: | |
st.markdown(f"""<div><a href="{url}">{title}</a></div>""", unsafe_allow_html=True) | |
st.markdown(f"""<div>{content}...</div>""", unsafe_allow_html=True) | |
st.markdown("---") |