andreasmartin commited on
Commit
6017dce
·
1 Parent(s): 8c2f0ba

deepnote update

Browse files
Files changed (2) hide show
  1. faq.py +19 -3
  2. requirements.txt +3 -1
faq.py CHANGED
@@ -35,7 +35,7 @@ def create_documents(df: pd.DataFrame, page_content_column: str) -> pd.DataFrame
35
  return loader.load()
36
 
37
 
38
- def embedding_function(model_name: str) -> HuggingFaceEmbeddings:
39
  return HuggingFaceEmbeddings(
40
  model_name=model_name,
41
  encode_kwargs={"normalize_embeddings": True},
@@ -43,7 +43,7 @@ def embedding_function(model_name: str) -> HuggingFaceEmbeddings:
43
  )
44
 
45
 
46
- def vectordb(
47
  faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
48
  ) -> VectorStore:
49
  vectordb = None
@@ -63,7 +63,23 @@ def vectordb(
63
 
64
 
65
  def similarity_search(
66
- vectordb: VectorStore, query: str, k: int
67
  ) -> List[Tuple[Document, float]]:
68
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
69
  return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  return loader.load()
36
 
37
 
38
+ def define_embedding_function(model_name: str) -> HuggingFaceEmbeddings:
39
  return HuggingFaceEmbeddings(
40
  model_name=model_name,
41
  encode_kwargs={"normalize_embeddings": True},
 
43
  )
44
 
45
 
46
+ def get_vectordb(
47
  faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
48
  ) -> VectorStore:
49
  vectordb = None
 
63
 
64
 
65
  def similarity_search(
66
+ vectordb: VectorStore, query: str, k: int = 3
67
  ) -> List[Tuple[Document, float]]:
68
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
69
  return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
70
+
71
+
72
+ def load_vectordb_id(faq_id: str, page_content_column: str) -> VectorStore:
73
+ embedding_function = define_embedding_function("sentence-transformers/all-mpnet-base-v2")
74
+ vectordb = None
75
+ try:
76
+ vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
77
+ except Exception as e:
78
+ df = read_df(xlsx_url(faq_id))
79
+ documents = create_documents(df, page_content_column)
80
+ vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents)
81
+ return vectordb
82
+
83
+
84
+ def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
85
+ return load_vectordb_id(faq_id(sheet_url), page_content_column)
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  openpyxl
2
  langchain
3
  sentence_transformers
4
- awadb
 
 
 
1
  openpyxl
2
  langchain
3
  sentence_transformers
4
+ awadb
5
+ fastapi
6
+ uvicorn