Spaces:
Runtime error
Runtime error
Jorge Henao
commited on
Commit
路
9e6217b
1
Parent(s):
9735086
pinecone fixes
Browse files- app_pinecode.py +1 -1
- config.py +1 -1
- pinecode_quieries.py +16 -12
- requirements.txt +4 -4
app_pinecode.py
CHANGED
@@ -58,7 +58,7 @@ if __name__ == "__main__":
|
|
58 |
# Every form must have a submit button.
|
59 |
submitted = st.form_submit_button("Buscar")
|
60 |
|
61 |
-
results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
|
62 |
|
63 |
# on submit we execute search
|
64 |
if(submitted):
|
|
|
58 |
# Every form must have a submit button.
|
59 |
submitted = st.form_submit_button("Buscar")
|
60 |
|
61 |
+
#results = search("que es el adres", retriever_top_k=5, reader_top_k=3)
|
62 |
|
63 |
# on submit we execute search
|
64 |
if(submitted):
|
config.py
CHANGED
@@ -2,7 +2,7 @@ class Config():
|
|
2 |
es_host = "saimon-askwdemocracy.es.us-central1.gcp.cloud.es.io"
|
3 |
es_user = "elastic"
|
4 |
es_password = "53f2a7a9-ea9d-4fd2-a8bc-f471b67f0262"
|
5 |
-
proposals_index = "
|
6 |
reader_model_name_or_path = "deepset/xlm-roberta-base-squad2-distilled"
|
7 |
#reader_model_name_or_path = "deepset/xlm-roberta-base-squad2"
|
8 |
use_gpu = True
|
|
|
2 |
es_host = "saimon-askwdemocracy.es.us-central1.gcp.cloud.es.io"
|
3 |
es_user = "elastic"
|
4 |
es_password = "53f2a7a9-ea9d-4fd2-a8bc-f471b67f0262"
|
5 |
+
proposals_index = "docsreloaded"
|
6 |
reader_model_name_or_path = "deepset/xlm-roberta-base-squad2-distilled"
|
7 |
#reader_model_name_or_path = "deepset/xlm-roberta-base-squad2"
|
8 |
use_gpu = True
|
pinecode_quieries.py
CHANGED
@@ -46,6 +46,9 @@ class PineconeRetriever(BaseComponent):
|
|
46 |
self.index = pinecone.Index(index_name)
|
47 |
|
48 |
def run(self, query: str, top_k: Optional[int]):
|
|
|
|
|
|
|
49 |
# process the inputs
|
50 |
vector_embeddings = self.sts_model.encode(query).tolist()
|
51 |
response = self.index.query([vector_embeddings], top_k=top_k, include_metadata=True)
|
@@ -57,7 +60,7 @@ class PineconeRetriever(BaseComponent):
|
|
57 |
'source': d["metadata"]['source']
|
58 |
}
|
59 |
)
|
60 |
-
for d in response[
|
61 |
]
|
62 |
output = {"documents": docs, "query": query}
|
63 |
return output, "output_1"
|
@@ -84,36 +87,37 @@ class PinecodeProposalQueries(DocumentQueries):
|
|
84 |
self.reader = reader
|
85 |
|
86 |
#pinecone.init(api_key=es_password, environment="us-east1-gcp")
|
87 |
-
index_name =
|
88 |
|
89 |
self.document_store = PineconeDocumentStore(
|
90 |
api_key=es_password,
|
91 |
environment = "us-east1-gcp",
|
92 |
index=index_name,
|
93 |
similarity="cosine",
|
94 |
-
embedding_dim=384
|
|
|
95 |
)
|
96 |
self.pipe = Pipeline()
|
97 |
pinecone_retriever = PineconeRetriever("sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
98 |
es_password, "us-east1-gcp",
|
99 |
index_name)
|
100 |
-
self.pipe.add_node(component=pinecone_retriever, name="Retriever", inputs=["Query"])
|
101 |
-
self.pipe.add_node(component=self.reader, name="Reader", inputs=["Retriever"])
|
102 |
|
103 |
# #self.retriever = BM25Retriever(document_store = self.document_store)
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
# retriever_model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
112 |
|
113 |
|
114 |
|
115 |
#self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
|
116 |
-
|
117 |
#self.pipe = DocumentSearchPipeline(self.retriever)
|
118 |
|
119 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
|
|
|
46 |
self.index = pinecone.Index(index_name)
|
47 |
|
48 |
def run(self, query: str, top_k: Optional[int]):
|
49 |
+
#TODO: problema de versiones del cliente de pinecone. en esta versi贸n retorna una coleci贸n de resutsl con los matches.
|
50 |
+
#TODO2: no esta el contenido dentro de los metadatos, ni en ningun lado. normal? , workarround. guararlo en otrometado.
|
51 |
+
#TODO3: hacer consulta por id, pastelarlo desde el colab que si retorna?
|
52 |
# process the inputs
|
53 |
vector_embeddings = self.sts_model.encode(query).tolist()
|
54 |
response = self.index.query([vector_embeddings], top_k=top_k, include_metadata=True)
|
|
|
60 |
'source': d["metadata"]['source']
|
61 |
}
|
62 |
)
|
63 |
+
for d in response['matches']
|
64 |
]
|
65 |
output = {"documents": docs, "query": query}
|
66 |
return output, "output_1"
|
|
|
87 |
self.reader = reader
|
88 |
|
89 |
#pinecone.init(api_key=es_password, environment="us-east1-gcp")
|
90 |
+
index_name = es_index
|
91 |
|
92 |
self.document_store = PineconeDocumentStore(
|
93 |
api_key=es_password,
|
94 |
environment = "us-east1-gcp",
|
95 |
index=index_name,
|
96 |
similarity="cosine",
|
97 |
+
embedding_dim=384,
|
98 |
+
metadata_config = {"indexed": ["title", "content"]}
|
99 |
)
|
100 |
self.pipe = Pipeline()
|
101 |
pinecone_retriever = PineconeRetriever("sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
102 |
es_password, "us-east1-gcp",
|
103 |
index_name)
|
104 |
+
#self.pipe.add_node(component=pinecone_retriever, name="Retriever", inputs=["Query"])
|
105 |
+
#self.pipe.add_node(component=self.reader, name="Reader", inputs=["Retriever"])
|
106 |
|
107 |
# #self.retriever = BM25Retriever(document_store = self.document_store)
|
108 |
+
self.retriever = EmbeddingRetriever(
|
109 |
+
document_store=self.document_store,
|
110 |
+
#embedding_model="multi-qa-distilbert-dot-v1",
|
111 |
+
embedding_model = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
112 |
+
model_format="sentence_transformers"
|
113 |
+
)
|
114 |
|
115 |
# retriever_model = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
|
116 |
|
117 |
|
118 |
|
119 |
#self.document_store.update_embeddings(self.retriever, update_existing_embeddings=False)
|
120 |
+
self.pipe = ExtractiveQAPipeline (reader = self.reader, retriever = self.retriever)
|
121 |
#self.pipe = DocumentSearchPipeline(self.retriever)
|
122 |
|
123 |
def search_by_query(self, query : str, retriever_top_k: int, reader_top_k: int, es_index: str = None) :
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
transformers
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
1 |
+
sentence-transformers
|
2 |
+
farm-haystack[pinecone]
|
3 |
+
pinecone-client
|
4 |
+
streamlit
|