Spaces:
Runtime error
Runtime error
# %% | |
from haystack.document_stores import FAISSDocumentStore | |
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat") | |
# %% | |
import pandas as pd | |
df_document = pd.read_csv("data/articles.csv") | |
articles = [] | |
for idx, row in df_document.iterrows(): | |
article = { | |
"content": row["article"], | |
"meta":{ | |
"chapter_name": row["chapter_name"], | |
"article_page": row["article_page"], | |
"article_number": row["article_number"], | |
"article_name": row["article_name"], | |
}, | |
} | |
articles.append(article) | |
document_store.write_documents(articles, index="document") | |
print(f"Loaded {document_store.get_document_count()} documents") | |
# %% | |
from haystack.nodes import DensePassageRetriever | |
retriever = DensePassageRetriever( | |
document_store=document_store, | |
query_embedding_model="sadakmed/dpr-passage_encoder-spanish", | |
passage_embedding_model="sadakmed/dpr-passage_encoder-spanish", | |
max_seq_len_query=64, | |
max_seq_len_passage=384, | |
batch_size=16, | |
use_gpu=False, | |
embed_title=True, | |
use_fast_tokenizers=True, | |
) | |
document_store.update_embeddings(retriever) | |
# %% | |
from haystack.nodes import FARMReader | |
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es" | |
reader = FARMReader( | |
model_name_or_path=model_ckpt, | |
progress_bar=False, | |
max_seq_len=384, | |
doc_stride=128, | |
return_no_answer=True, | |
use_gpu=False, | |
) | |
# %% | |
from haystack.pipelines import ExtractiveQAPipeline | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
# %% | |
question = "pueblos originarios justicia" | |
prediction = pipe.run( | |
query=question, | |
params={ | |
"Retriever": {"top_k": 10}, | |
"Reader": {"top_k": 5} | |
} | |
) | |
# %% | |
from pprint import pprint | |
pprint(prediction) | |
# %% | |
from haystack.utils import print_answers | |
print_answers(prediction, details="minimum") | |
# %% | |