borrador_constitucion_chile / qa_pipeline_faiss.py
palegre
Add application file beta.
b19c8bc
raw
history blame
1.88 kB
# %%
from haystack.document_stores import FAISSDocumentStore
document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")
# %%
import pandas as pd
df_document = pd.read_csv("data/articles.csv")
articles = []
for idx, row in df_document.iterrows():
article = {
"content": row["article"],
"meta":{
"chapter_name": row["chapter_name"],
"article_page": row["article_page"],
"article_number": row["article_number"],
"article_name": row["article_name"],
},
}
articles.append(article)
document_store.write_documents(articles, index="document")
print(f"Loaded {document_store.get_document_count()} documents")
# %%
from haystack.nodes import DensePassageRetriever
retriever = DensePassageRetriever(
document_store=document_store,
query_embedding_model="sadakmed/dpr-passage_encoder-spanish",
passage_embedding_model="sadakmed/dpr-passage_encoder-spanish",
max_seq_len_query=64,
max_seq_len_passage=384,
batch_size=16,
use_gpu=False,
embed_title=True,
use_fast_tokenizers=True,
)
document_store.update_embeddings(retriever)
# %%
from haystack.nodes import FARMReader
model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
reader = FARMReader(
model_name_or_path=model_ckpt,
progress_bar=False,
max_seq_len=384,
doc_stride=128,
return_no_answer=True,
use_gpu=False,
)
# %%
from haystack.pipelines import ExtractiveQAPipeline
pipe = ExtractiveQAPipeline(reader, retriever)
# %%
question = "pueblos originarios justicia"
prediction = pipe.run(
query=question,
params={
"Retriever": {"top_k": 10},
"Reader": {"top_k": 5}
}
)
# %%
from pprint import pprint
pprint(prediction)
# %%
from haystack.utils import print_answers
print_answers(prediction, details="minimum")
# %%