File size: 1,877 Bytes
b19c8bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# %%
from haystack.document_stores import FAISSDocumentStore


document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")
# %%
import pandas as pd

df_document = pd.read_csv("data/articles.csv")

articles = []
for idx, row in df_document.iterrows():
    article = {
        "content": row["article"],
        "meta":{
            "chapter_name": row["chapter_name"],
            "article_page": row["article_page"],
            "article_number": row["article_number"],
            "article_name": row["article_name"],
        },
    }
    articles.append(article)

document_store.write_documents(articles, index="document")
print(f"Loaded {document_store.get_document_count()} documents")
# %%
from haystack.nodes import DensePassageRetriever

retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="sadakmed/dpr-passage_encoder-spanish",
    passage_embedding_model="sadakmed/dpr-passage_encoder-spanish",
    max_seq_len_query=64,
    max_seq_len_passage=384,
    batch_size=16,
    use_gpu=False,
    embed_title=True,
    use_fast_tokenizers=True,
)
document_store.update_embeddings(retriever)
# %%
from haystack.nodes import FARMReader

model_ckpt = "mrm8488/distill-bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
reader = FARMReader(
    model_name_or_path=model_ckpt,
    progress_bar=False,
    max_seq_len=384,
    doc_stride=128,
    return_no_answer=True,
    use_gpu=False,
)
# %%
from haystack.pipelines import ExtractiveQAPipeline

pipe = ExtractiveQAPipeline(reader, retriever)
# %%
question = "pueblos originarios justicia"
prediction = pipe.run(
    query=question,
    params={
        "Retriever": {"top_k": 10},
        "Reader": {"top_k": 5}
    }
)
# %%
from pprint import pprint

pprint(prediction)

# %%
from haystack.utils import print_answers


print_answers(prediction, details="minimum")
# %%