Rahatara commited on
Commit
5adf43d
·
verified ·
1 Parent(s): 0037eaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +152 -84
app.py CHANGED
@@ -11,36 +11,23 @@ import os
11
  import re
12
  import openai
13
 
14
- openai.api_key = "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"
15
-
16
- def add_text(history: List[Tuple[str, str]], text: str) -> List[Tuple[str, str]]:
17
- if not text:
18
- raise gr.Error("Enter text")
19
- history.append((text, ""))
20
- return history
21
-
22
  class MyApp:
23
  def __init__(self) -> None:
24
- self.OPENAI_API_KEY: str = openai.api_key
25
  self.chain = None
26
  self.chat_history: list = []
27
  self.documents = None
28
  self.file_name = None
29
 
30
- def __call__(self, file: str) -> ConversationalRetrievalChain:
31
- if self.chain is None:
32
- self.chain = self.build_chain(file)
33
- return self.chain
34
 
35
  def process_file(self, file) -> Image.Image:
36
  loader = PyMuPDFLoader(file.name)
37
  self.documents = loader.load()
38
- pattern = r"/([^/]+)$"
39
- match = re.search(pattern, file.name)
40
- try:
41
- self.file_name = match.group(1)
42
- except:
43
- self.file_name = os.path.basename(file)
44
  doc = fitz.open(file.name)
45
  page = doc[0]
46
  pix = page.get_pixmap(dpi=150)
@@ -61,12 +48,41 @@ class MyApp:
61
  )
62
  return "Vector database built successfully!"
63
 
64
- def get_response(history, query, file):
65
- if not file:
66
- raise gr.Error(message="Upload a PDF")
67
- chain = app(file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  try:
69
- result = chain.invoke(
70
  {"question": query, "chat_history": app.chat_history}
71
  )
72
  app.chat_history.append((query, result["answer"]))
@@ -75,13 +91,13 @@ def get_response(history, query, file):
75
  for doc in source_docs:
76
  source_texts.append(f"Page {doc.metadata['page'] + 1}: {doc.page_content}")
77
  source_texts_str = "\n\n".join(source_texts)
78
- for char in result["answer"]:
79
- history[-1][-1] += char
80
  return history, source_texts_str
81
- except Exception:
82
  app.chat_history.append((query, "I have no information about it. Feed me knowledge, please!"))
83
- return history, "I have no information about it. Feed me knowledge, please!"
84
 
 
85
  def render_file(file) -> Image.Image:
86
  doc = fitz.open(file.name)
87
  page = doc[0]
@@ -89,77 +105,129 @@ def render_file(file) -> Image.Image:
89
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
90
  return image
91
 
92
- def purge_chat_and_render_first(file) -> Tuple[Image.Image, list]:
 
93
  app.chat_history = []
94
  doc = fitz.open(file.name)
95
  page = doc[0]
96
  pix = page.get_pixmap(dpi=150)
97
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
98
- return image, []
99
 
 
100
  def refresh_chat():
101
  app.chat_history = []
102
  return []
103
 
104
  app = MyApp()
105
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Blocks() as demo:
107
- with gr.Tab("Step 1: Upload PDF"):
108
- btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"])
109
- show_img = gr.Image(label="Uploaded PDF")
110
-
111
- with gr.Tab("Step 2: Process File"):
112
- process_btn = gr.Button("Process PDF")
113
- show_img_processed = gr.Image(label="Processed PDF")
114
- process_status = gr.Textbox(label="Processing Status", interactive=False)
115
-
116
- with gr.Tab("Step 3: Build Vector Database"):
117
- build_vector_btn = gr.Button("Build Vector Database")
118
- status_text = gr.Textbox(label="Status", value="", interactive=False)
119
-
120
- with gr.Tab("Step 4: Ask Questions"):
121
- chatbot = gr.Chatbot(elem_id="chatbot")
122
- txt = gr.Textbox(
123
- show_label=False,
124
- placeholder="Enter text and press submit",
125
- scale=2
126
- )
127
- submit_btn = gr.Button("Submit", scale=1)
128
- refresh_btn = gr.Button("Refresh Chat", scale=1)
129
- source_texts_output = gr.Textbox(label="Source Texts", interactive=False)
130
-
131
- btn.upload(
132
- fn=purge_chat_and_render_first,
133
- inputs=[btn],
134
- outputs=[show_img, chatbot],
135
- )
136
-
137
- process_btn.click(
138
- fn=lambda file: (app.process_file(file), "Processing complete!"),
139
- inputs=[btn],
140
- outputs=[show_img_processed, process_status],
141
- )
142
-
143
- build_vector_btn.click(
144
- fn=app.build_chain,
145
- inputs=[btn],
146
- outputs=[status_text],
147
- )
148
-
149
- submit_btn.click(
150
- fn=add_text,
151
- inputs=[chatbot, txt],
152
- outputs=[chatbot],
153
- queue=False,
154
- ).success(
155
- fn=get_response, inputs=[chatbot, txt, btn], outputs=[chatbot, source_texts_output]
156
  )
157
 
158
- refresh_btn.click(
159
- fn=refresh_chat,
160
- inputs=[],
161
- outputs=[chatbot],
162
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  demo.queue()
165
  demo.launch()
 
11
  import re
12
  import openai
13
 
14
+ # MyApp class to handle the processes
 
 
 
 
 
 
 
15
  class MyApp:
16
  def __init__(self) -> None:
17
+ self.OPENAI_API_KEY: str = None # Initialize with None
18
  self.chain = None
19
  self.chat_history: list = []
20
  self.documents = None
21
  self.file_name = None
22
 
23
+ def set_api_key(self, api_key: str):
24
+ self.OPENAI_API_KEY = api_key
25
+ openai.api_key = api_key
 
26
 
27
  def process_file(self, file) -> Image.Image:
28
  loader = PyMuPDFLoader(file.name)
29
  self.documents = loader.load()
30
+ self.file_name = os.path.basename(file.name)
 
 
 
 
 
31
  doc = fitz.open(file.name)
32
  page = doc[0]
33
  pix = page.get_pixmap(dpi=150)
 
48
  )
49
  return "Vector database built successfully!"
50
 
51
+ # Function to add text to chat history
52
+ def add_text(history: List[Tuple[str, str]], text: str) -> List[Tuple[str, str]]:
53
+ if not text:
54
+ raise gr.Error("Enter text")
55
+ history.append((text, ""))
56
+ return history
57
+
58
+ # Function to get response from the model
59
+ def get_response(history, query):
60
+ if app.chain is None:
61
+ raise gr.Error("The chain has not been built yet. Please ensure the vector database is built before querying.")
62
+
63
+ try:
64
+ result = app.chain.invoke(
65
+ {"question": query, "chat_history": app.chat_history}
66
+ )
67
+ app.chat_history.append((query, result["answer"]))
68
+ source_docs = result["source_documents"]
69
+ source_texts = []
70
+ for doc in source_docs:
71
+ source_texts.append(f"Page {doc.metadata['page'] + 1}: {doc.page_content}")
72
+ source_texts_str = "\n\n".join(source_texts)
73
+ history[-1] = (history[-1][0], result["answer"])
74
+ return history, source_texts_str
75
+ except Exception as e:
76
+ app.chat_history.append((query, "I have no information about it. Feed me knowledge, please!"))
77
+ return history, f"I have no information about it. Feed me knowledge, please! Error: {str(e)}"
78
+
79
+ # Function to get response for the current RAG tab
80
+ def get_response_current(history, query):
81
+ if app.chain is None:
82
+ raise gr.Error("The chain has not been built yet. Please ensure the vector database is built before querying.")
83
+
84
  try:
85
+ result = app.chain.invoke(
86
  {"question": query, "chat_history": app.chat_history}
87
  )
88
  app.chat_history.append((query, result["answer"]))
 
91
  for doc in source_docs:
92
  source_texts.append(f"Page {doc.metadata['page'] + 1}: {doc.page_content}")
93
  source_texts_str = "\n\n".join(source_texts)
94
+ history[-1] = (history[-1][0], result["answer"])
 
95
  return history, source_texts_str
96
+ except Exception as e:
97
  app.chat_history.append((query, "I have no information about it. Feed me knowledge, please!"))
98
+ return history, f"I have no information about it. Feed me knowledge, please! Error: {str(e)}"
99
 
100
+ # Function to render file
101
  def render_file(file) -> Image.Image:
102
  doc = fitz.open(file.name)
103
  page = doc[0]
 
105
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
106
  return image
107
 
108
+ # Function to purge chat and render first page of PDF
109
+ def purge_chat_and_render_first(file) -> Image.Image:
110
  app.chat_history = []
111
  doc = fitz.open(file.name)
112
  page = doc[0]
113
  pix = page.get_pixmap(dpi=150)
114
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
115
+ return image
116
 
117
+ # Function to refresh chat
118
  def refresh_chat():
119
  app.chat_history = []
120
  return []
121
 
122
  app = MyApp()
123
 
124
+ # Function to set API key
125
+ def set_api_key(api_key):
126
+ app.set_api_key(api_key)
127
+ # Pre-process the saved PDF file after setting the API key
128
+ saved_file_path = "track_training.pdf"
129
+ with open(saved_file_path, 'rb') as saved_file:
130
+ app.process_file(saved_file)
131
+ app.build_chain(saved_file)
132
+ return f"API Key set to {api_key[:4]}...{api_key[-4:]} and vector database built successfully!"
133
+
134
+ # Gradio interface
135
  with gr.Blocks() as demo:
136
+ api_key_input = gr.Textbox(label="OpenAI API Key", type="password", placeholder="Enter your OpenAI API Key")
137
+ api_key_btn = gr.Button("Set API Key")
138
+ api_key_status = gr.Textbox(value="API Key status", interactive=False)
139
+
140
+ api_key_btn.click(
141
+ fn=set_api_key,
142
+ inputs=[api_key_input],
143
+ outputs=[api_key_status]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  )
145
 
146
+ with gr.Tab("Inst RAG"):
147
+ with gr.Column():
148
+ with gr.Tab("Upload PDF"):
149
+ btn = gr.UploadButton("📁 Upload a PDF", file_types=[".pdf"])
150
+ show_img = gr.Image(label="Uploaded PDF")
151
+
152
+ btn.upload(
153
+ fn=purge_chat_and_render_first,
154
+ inputs=[btn],
155
+ outputs=[show_img],
156
+ )
157
+
158
+ with gr.Tab("Process PDF"):
159
+ process_btn = gr.Button("Process PDF")
160
+ show_img_processed = gr.Image(label="Processed PDF")
161
+ process_status = gr.Textbox(label="Processing Status", interactive=False)
162
+
163
+ process_btn.click(
164
+ fn=lambda file: (app.process_file(file), "Processing complete!"),
165
+ inputs=[btn],
166
+ outputs=[show_img_processed, process_status],
167
+ )
168
+
169
+ with gr.Tab("Build Vector Database"):
170
+ build_vector_btn = gr.Button("Build Vector Database")
171
+ status_text = gr.Textbox(label="Status", value="", interactive=False)
172
+
173
+ build_vector_btn.click(
174
+ fn=app.build_chain,
175
+ inputs=[btn],
176
+ outputs=[status_text],
177
+ )
178
+
179
+ with gr.Tab("Chat"):
180
+ chatbot = gr.Chatbot(elem_id="chatbot")
181
+ txt = gr.Textbox(
182
+ show_label=False,
183
+ placeholder="Enter text and press submit",
184
+ scale=2
185
+ )
186
+ submit_btn = gr.Button("Submit", scale=1)
187
+ refresh_btn = gr.Button("Refresh Chat", scale=1)
188
+ source_texts_output = gr.Textbox(label="Source Texts", interactive=False)
189
+
190
+ submit_btn.click(
191
+ fn=add_text,
192
+ inputs=[chatbot, txt],
193
+ outputs=[chatbot],
194
+ queue=False,
195
+ ).success(
196
+ fn=get_response, inputs=[chatbot, txt], outputs=[chatbot, source_texts_output]
197
+ )
198
+
199
+ refresh_btn.click(
200
+ fn=refresh_chat,
201
+ inputs=[],
202
+ outputs=[chatbot],
203
+ )
204
+
205
+ with gr.Tab("Current RAG"):
206
+ with gr.Column():
207
+ chatbot_current = gr.Chatbot(elem_id="chatbot_current")
208
+ txt_current = gr.Textbox(
209
+ show_label=False,
210
+ placeholder="Enter text and press submit",
211
+ scale=2
212
+ )
213
+ submit_btn_current = gr.Button("Submit", scale=1)
214
+ refresh_btn_current = gr.Button("Refresh Chat", scale=1)
215
+ source_texts_output_current = gr.Textbox(label="Source Texts", interactive=False)
216
+
217
+ submit_btn_current.click(
218
+ fn=add_text,
219
+ inputs=[chatbot_current, txt_current],
220
+ outputs=[chatbot_current],
221
+ queue=False,
222
+ ).success(
223
+ fn=get_response_current, inputs=[chatbot_current, txt_current], outputs=[chatbot_current, source_texts_output_current]
224
+ )
225
+
226
+ refresh_btn_current.click(
227
+ fn=refresh_chat,
228
+ inputs=[],
229
+ outputs=[chatbot_current],
230
+ )
231
 
232
  demo.queue()
233
  demo.launch()