andreasmartin commited on
Commit
8c2f0ba
·
1 Parent(s): 96db48f

deepnote update

Browse files
Files changed (1) hide show
  1. faq.py +17 -20
faq.py CHANGED
@@ -8,22 +8,22 @@ from langchain.embeddings.base import Embeddings
8
  from langchain.vectorstores.base import VectorStore
9
  import os
10
 
11
- sheet_url_x = "https://docs.google.com/spreadsheets/d/"
12
- sheet_url_y = "/edit#gid="
13
- sheet_url_y_exp = "/export?gid="
14
- cache_folder=".embedding-model"
15
- dir_vectordb = ".vectordb"
16
 
17
 
18
  def faq_id(sheet_url: str) -> str:
19
- x = sheet_url.find(sheet_url_x)
20
- y = sheet_url.find(sheet_url_y)
21
- return sheet_url[x + len(sheet_url_x) : y] + "-" + sheet_url[y + len(sheet_url_y) :]
22
 
23
 
24
  def xlsx_url(faq_id: str) -> str:
25
  y = faq_id.rfind("-")
26
- return sheet_url_x + faq_id[0:y] + sheet_url_y_exp + faq_id[y + 1 :]
27
 
28
 
29
  def read_df(xlsx_url: str) -> pd.DataFrame:
@@ -39,21 +39,16 @@ def embedding_function(model_name: str) -> HuggingFaceEmbeddings:
39
  return HuggingFaceEmbeddings(
40
  model_name=model_name,
41
  encode_kwargs={"normalize_embeddings": True},
42
- cache_folder=cache_folder
43
  )
44
 
45
 
46
  def vectordb(
47
- faq_id: str,
48
- embedding_function: Embeddings,
49
- documents: List[Document] = None
50
  ) -> VectorStore:
51
  vectordb = None
52
  if documents is None:
53
- vectordb = AwaDB(
54
- embedding=embedding_function,
55
- log_and_data_dir=dir_vectordb
56
- )
57
  success = vectordb.load_local(table_name=faq_id)
58
  if not success:
59
  raise Exception("faq_id may not exists")
@@ -62,11 +57,13 @@ def vectordb(
62
  documents=documents,
63
  embedding=embedding_function,
64
  table_name=faq_id,
65
- log_and_data_dir=dir_vectordb
66
  )
67
  return vectordb
68
 
69
 
70
- def similarity_search(vectordb: VectorStore, query: str, k: int) -> List[Tuple[Document, float]]:
 
 
71
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
72
- return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
 
8
  from langchain.vectorstores.base import VectorStore
9
  import os
10
 
11
+ SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
12
+ SHEET_URL_Y = "/edit#gid="
13
+ SHEET_URL_Y_EXPORT = "/export?gid="
14
+ CACHE_FOLDER = ".embedding-model"
15
+ VECTORDB_FOLDER = ".vectordb"
16
 
17
 
18
  def faq_id(sheet_url: str) -> str:
19
+ x = sheet_url.find(SHEET_URL_X)
20
+ y = sheet_url.find(SHEET_URL_Y)
21
+ return sheet_url[x + len(SHEET_URL_X) : y] + "-" + sheet_url[y + len(SHEET_URL_Y) :]
22
 
23
 
24
  def xlsx_url(faq_id: str) -> str:
25
  y = faq_id.rfind("-")
26
+ return SHEET_URL_X + faq_id[0:y] + SHEET_URL_Y_EXPORT + faq_id[y + 1 :]
27
 
28
 
29
  def read_df(xlsx_url: str) -> pd.DataFrame:
 
39
  return HuggingFaceEmbeddings(
40
  model_name=model_name,
41
  encode_kwargs={"normalize_embeddings": True},
42
+ cache_folder=CACHE_FOLDER,
43
  )
44
 
45
 
46
  def vectordb(
47
+ faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
 
 
48
  ) -> VectorStore:
49
  vectordb = None
50
  if documents is None:
51
+ vectordb = AwaDB(embedding=embedding_function, log_and_data_dir=VECTORDB_FOLDER)
 
 
 
52
  success = vectordb.load_local(table_name=faq_id)
53
  if not success:
54
  raise Exception("faq_id may not exists")
 
57
  documents=documents,
58
  embedding=embedding_function,
59
  table_name=faq_id,
60
+ log_and_data_dir=VECTORDB_FOLDER,
61
  )
62
  return vectordb
63
 
64
 
65
+ def similarity_search(
66
+ vectordb: VectorStore, query: str, k: int
67
+ ) -> List[Tuple[Document, float]]:
68
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
69
+ return vectordb.similarity_search_with_relevance_scores(query=query, k=k)