andreasmartin commited on
Commit
6323bc8
·
1 Parent(s): 41a8232

deepnote update

Browse files
Files changed (2) hide show
  1. app.py +26 -14
  2. faq.py +30 -5
app.py CHANGED
@@ -7,7 +7,7 @@ import gradio as gr
7
  app = FastAPI()
8
 
9
 
10
- class Request(BaseModel):
11
  question: str
12
  sheet_url: str
13
  page_content_column: str
@@ -15,31 +15,43 @@ class Request(BaseModel):
15
 
16
 
17
  @app.post("/api/v1/ask")
18
- async def ask_api(request: Request):
19
  return ask(
20
  request.sheet_url, request.page_content_column, request.k, request.question
21
  )
22
 
23
 
 
 
 
 
 
24
  def ask(sheet_url: str, page_content_column: str, k: int, question: str):
25
  vectordb = faq.load_vectordb(sheet_url, page_content_column)
26
  result = faq.similarity_search(vectordb, question, k=k)
27
  return result
28
 
29
 
30
- iface = gr.Interface(
31
- fn=ask,
32
- inputs=[
33
- gr.Textbox(label="Google Sheet URL"),
34
- gr.Textbox(label="Question Column"),
35
- gr.Slider(1, 5, step=1, label="K"),
36
- gr.Textbox(label="Question"),
37
- ],
38
- outputs=[gr.JSON(label="Answer")],
39
- allow_flagging="never",
40
- )
 
 
 
 
 
 
 
41
 
42
- app = gr.mount_gradio_app(app, iface, path="/")
43
 
44
 
45
  if __name__ == "__main__":
 
7
  app = FastAPI()
8
 
9
 
10
+ class AskRequest(BaseModel):
11
  question: str
12
  sheet_url: str
13
  page_content_column: str
 
15
 
16
 
17
  @app.post("/api/v1/ask")
18
+ async def ask_api(request: AskRequest):
19
  return ask(
20
  request.sheet_url, request.page_content_column, request.k, request.question
21
  )
22
 
23
 
24
+ @app.delete("/api/v1/")
25
+ async def delete_vectordb_api():
26
+ return delete_vectordb()
27
+
28
+
29
  def ask(sheet_url: str, page_content_column: str, k: int, question: str):
30
  vectordb = faq.load_vectordb(sheet_url, page_content_column)
31
  result = faq.similarity_search(vectordb, question, k=k)
32
  return result
33
 
34
 
35
+ def delete_vectordb():
36
+ faq.delete_vectordb()
37
+
38
+
39
+ with gr.Blocks() as block:
40
+ sheet_url = gr.Textbox(label="Google Sheet URL")
41
+ page_content_column = gr.Textbox(label="Question Column")
42
+ k = gr.Slider(2, 5, step=1, label="K")
43
+ question = gr.Textbox(label="Question")
44
+ ask_button = gr.Button("Ask")
45
+ answer_output = gr.JSON(label="Answer")
46
+ delete_button = gr.Button("Delete Vector DB")
47
+ ask_button.click(
48
+ ask,
49
+ inputs=[sheet_url, page_content_column, k, question],
50
+ outputs=answer_output,
51
+ )
52
+ delete_button.click(delete_vectordb)
53
 
54
+ app = gr.mount_gradio_app(app, block, path="/")
55
 
56
 
57
  if __name__ == "__main__":
faq.py CHANGED
@@ -7,12 +7,14 @@ from langchain.docstore.document import Document
7
  from langchain.embeddings.base import Embeddings
8
  from langchain.vectorstores.base import VectorStore
9
  import os
 
10
 
11
  SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
12
  SHEET_URL_Y = "/edit#gid="
13
  SHEET_URL_Y_EXPORT = "/export?gid="
14
  CACHE_FOLDER = ".embedding-model"
15
  VECTORDB_FOLDER = ".vectordb"
 
16
 
17
 
18
  def faq_id(sheet_url: str) -> str:
@@ -69,17 +71,40 @@ def similarity_search(
69
  return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
70
 
71
 
72
- def load_vectordb_id(faq_id: str, page_content_column: str) -> VectorStore:
73
- embedding_function = define_embedding_function("sentence-transformers/all-mpnet-base-v2")
 
 
 
 
74
  vectordb = None
75
  try:
76
  vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
77
  except Exception as e:
78
- df = read_df(xlsx_url(faq_id))
79
- documents = create_documents(df, page_content_column)
80
- vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return vectordb
82
 
83
 
84
  def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
85
  return load_vectordb_id(faq_id(sheet_url), page_content_column)
 
 
 
 
 
7
  from langchain.embeddings.base import Embeddings
8
  from langchain.vectorstores.base import VectorStore
9
  import os
10
+ import shutil
11
 
12
  SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
13
  SHEET_URL_Y = "/edit#gid="
14
  SHEET_URL_Y_EXPORT = "/export?gid="
15
  CACHE_FOLDER = ".embedding-model"
16
  VECTORDB_FOLDER = ".vectordb"
17
+ EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
18
 
19
 
20
  def faq_id(sheet_url: str) -> str:
 
71
  return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
72
 
73
 
74
+ def load_vectordb_id(
75
+ faq_id: str,
76
+ page_content_column: str,
77
+ embedding_function_name: str = EMBEDDING_MODEL,
78
+ ) -> VectorStore:
79
+ embedding_function = define_embedding_function(embedding_function_name)
80
  vectordb = None
81
  try:
82
  vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
83
  except Exception as e:
84
+ vectordb = create_vectordb_id(faq_id, page_content_column, embedding_function)
85
+
86
+ return vectordb
87
+
88
+
89
+ def create_vectordb_id(
90
+ faq_id: str,
91
+ page_content_column: str,
92
+ embedding_function: HuggingFaceEmbeddings = None,
93
+ ) -> VectorStore:
94
+ if embedding_function is None:
95
+ embedding_function = define_embedding_function(EMBEDDING_MODEL)
96
+
97
+ df = read_df(xlsx_url(faq_id))
98
+ documents = create_documents(df, page_content_column)
99
+ vectordb = get_vectordb(
100
+ faq_id=faq_id, embedding_function=embedding_function, documents=documents
101
+ )
102
  return vectordb
103
 
104
 
105
  def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
106
  return load_vectordb_id(faq_id(sheet_url), page_content_column)
107
+
108
+
109
+ def delete_vectordb():
110
+ shutil.rmtree(VECTORDB_FOLDER, ignore_errors=True)