Spaces:
Running
Running
import streamlit as st | |
from langchain_community.embeddings import HuggingFaceBgeEmbeddings | |
from langchain_pinecone import PineconeVectorStore | |
from langchain_community.vectorstores.utils import DistanceStrategy | |
def load_bge_embeddings(): | |
model_name = "BAAI/bge-small-en-v1.5" | |
model_kwargs = {"device": "cpu"} | |
encode_kwargs = {"normalize_embeddings": True} | |
emb_fn = HuggingFaceBgeEmbeddings( | |
model_name=model_name, | |
model_kwargs=model_kwargs, | |
encode_kwargs=encode_kwargs, | |
query_instruction="Represent this question for searching relevant passages: ", | |
) | |
return emb_fn | |
def load_pinecone_vectorstore(): | |
emb_fn = load_bge_embeddings() | |
vectorstore = PineconeVectorStore( | |
embedding=emb_fn, | |
text_key="text", | |
distance_strategy=DistanceStrategy.COSINE, | |
pinecone_api_key=st.secrets["pinecone_api_key"], | |
index_name=st.secrets["pinecone_index_name"], | |
) | |
return vectorstore | |
def get_vectorstore_filter(ret_config: dict) -> dict: | |
vs_filter = {} | |
if ret_config["filter_legis_id"] != "": | |
vs_filter["legis_id"] = ret_config["filter_legis_id"] | |
if ret_config["filter_bioguide_id"] != "": | |
vs_filter["sponsor_bioguide_id"] = ret_config["filter_bioguide_id"] | |
vs_filter = { | |
**vs_filter, | |
"congress_num": {"$in": ret_config["filter_congress_nums"]}, | |
} | |
vs_filter = { | |
**vs_filter, | |
"sponsor_party": {"$in": ret_config["filter_sponsor_parties"]}, | |
} | |
return vs_filter | |