Spaces:
Runtime error
Runtime error
File size: 1,240 Bytes
8404ae6 37958d9 8404ae6 37958d9 8404ae6 0b8fc5d 37958d9 a42b85f e44114f 37958d9 8404ae6 37958d9 c920449 8404ae6 37958d9 c920449 8404ae6 37958d9 8404ae6 04575d6 c920449 e6c3185 04575d6 7ecf31e 04575d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import streamlit as st
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
def search(text, model, ds, n):
encoded_text = model.encode(text)
scores, retrieved_examples = ds.get_nearest_examples('embedding', encoded_text, k=n)
matching_titles = retrieved_examples["title"]
urls = retrieved_examples["link"]
contents = retrieved_examples["content"]
return list(zip(matching_titles, [c[:150] for c in contents], urls, scores))
@st.cache()
def get_dataset():
ds = load_dataset("justinian336/salvadoran-news-embedded")["train"]
ds.add_faiss_index(column="embedding")
return ds
def get_model():
if "model" not in st.session_state:
st.session_state["model"] = SentenceTransformer("justinian336/chupeto")
ds = get_dataset()
get_model()
st.markdown("# Buscador de Noticias Salvadoreñas")
search_text = st.text_input(label="Búsqueda")
if search_text:
search_results = search(search_text, st.session_state["model"], ds, 10)
for title, content, url, _ in search_results:
st.markdown(f"""<div><a href="{url}">{title}</a></div>""", unsafe_allow_html=True)
st.markdown(f"""<div>{content}...</div>""", unsafe_allow_html=True)
st.markdown("---") |