Spaces:
Sleeping
Sleeping
andreasmartin
commited on
Commit
·
6017dce
1
Parent(s):
8c2f0ba
deepnote update
Browse files- faq.py +19 -3
- requirements.txt +3 -1
faq.py
CHANGED
@@ -35,7 +35,7 @@ def create_documents(df: pd.DataFrame, page_content_column: str) -> pd.DataFrame
|
|
35 |
return loader.load()
|
36 |
|
37 |
|
38 |
-
def
|
39 |
return HuggingFaceEmbeddings(
|
40 |
model_name=model_name,
|
41 |
encode_kwargs={"normalize_embeddings": True},
|
@@ -43,7 +43,7 @@ def embedding_function(model_name: str) -> HuggingFaceEmbeddings:
|
|
43 |
)
|
44 |
|
45 |
|
46 |
-
def
|
47 |
faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
|
48 |
) -> VectorStore:
|
49 |
vectordb = None
|
@@ -63,7 +63,23 @@ def vectordb(
|
|
63 |
|
64 |
|
65 |
def similarity_search(
|
66 |
-
vectordb: VectorStore, query: str, k: int
|
67 |
) -> List[Tuple[Document, float]]:
|
68 |
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
69 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
return loader.load()
|
36 |
|
37 |
|
38 |
+
def define_embedding_function(model_name: str) -> HuggingFaceEmbeddings:
|
39 |
return HuggingFaceEmbeddings(
|
40 |
model_name=model_name,
|
41 |
encode_kwargs={"normalize_embeddings": True},
|
|
|
43 |
)
|
44 |
|
45 |
|
46 |
+
def get_vectordb(
|
47 |
faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
|
48 |
) -> VectorStore:
|
49 |
vectordb = None
|
|
|
63 |
|
64 |
|
65 |
def similarity_search(
|
66 |
+
vectordb: VectorStore, query: str, k: int = 3
|
67 |
) -> List[Tuple[Document, float]]:
|
68 |
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
69 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
70 |
+
|
71 |
+
|
72 |
+
def load_vectordb_id(faq_id: str, page_content_column: str) -> VectorStore:
|
73 |
+
embedding_function = define_embedding_function("sentence-transformers/all-mpnet-base-v2")
|
74 |
+
vectordb = None
|
75 |
+
try:
|
76 |
+
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
|
77 |
+
except Exception as e:
|
78 |
+
df = read_df(xlsx_url(faq_id))
|
79 |
+
documents = create_documents(df, page_content_column)
|
80 |
+
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents)
|
81 |
+
return vectordb
|
82 |
+
|
83 |
+
|
84 |
+
def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
|
85 |
+
return load_vectordb_id(faq_id(sheet_url), page_content_column)
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
openpyxl
|
2 |
langchain
|
3 |
sentence_transformers
|
4 |
-
awadb
|
|
|
|
|
|
1 |
openpyxl
|
2 |
langchain
|
3 |
sentence_transformers
|
4 |
+
awadb
|
5 |
+
fastapi
|
6 |
+
uvicorn
|