XThomasBU commited on
Commit
c82efb6
·
1 Parent(s): e488f16

format changes

Browse files
code/app.py CHANGED
@@ -242,9 +242,9 @@ async def post_signin(request: Request):
242
  user_details.metadata["last_login"] = current_datetime
243
  # if new user, set the number of tries
244
  if "tokens_left" not in user_details.metadata:
245
- user_details.metadata["tokens_left"] = (
246
- TOKENS_LEFT # set the number of tokens left for the new user
247
- )
248
  if "all_time_tokens_allocated" not in user_details.metadata:
249
  user_details.metadata["all_time_tokens_allocated"] = ALL_TIME_TOKENS_ALLOCATED
250
  if "in_cooldown" not in user_details.metadata:
 
242
  user_details.metadata["last_login"] = current_datetime
243
  # if new user, set the number of tries
244
  if "tokens_left" not in user_details.metadata:
245
+ user_details.metadata[
246
+ "tokens_left"
247
+ ] = TOKENS_LEFT # set the number of tokens left for the new user
248
  if "all_time_tokens_allocated" not in user_details.metadata:
249
  user_details.metadata["all_time_tokens_allocated"] = ALL_TIME_TOKENS_ALLOCATED
250
  if "in_cooldown" not in user_details.metadata:
code/main.py CHANGED
@@ -505,7 +505,6 @@ class Chatbot:
505
  token_count += token_count_cb.total_tokens
506
 
507
  for question in list_of_questions:
508
-
509
  actions.append(
510
  cl.Action(
511
  name="follow up question",
@@ -549,7 +548,6 @@ class Chatbot:
549
 
550
  @cl.header_auth_callback
551
  def header_auth_callback(headers: dict) -> Optional[cl.User]:
552
-
553
  print("\n\n\nI am here\n\n\n")
554
  # try: # TODO: Add try-except block after testing
555
  # TODO: Implement to get the user information from the headers (not the cookie)
 
505
  token_count += token_count_cb.total_tokens
506
 
507
  for question in list_of_questions:
 
508
  actions.append(
509
  cl.Action(
510
  name="follow up question",
 
548
 
549
  @cl.header_auth_callback
550
  def header_auth_callback(headers: dict) -> Optional[cl.User]:
 
551
  print("\n\n\nI am here\n\n\n")
552
  # try: # TODO: Add try-except block after testing
553
  # TODO: Implement to get the user information from the headers (not the cookie)
code/modules/chat/helpers.py CHANGED
@@ -42,7 +42,6 @@ def get_sources(res, answer, stream=True, view_sources=False):
42
  full_answer += answer
43
 
44
  if view_sources:
45
-
46
  # Then, display the sources
47
  # check if the answer has sources
48
  if len(source_dict) == 0:
@@ -51,7 +50,6 @@ def get_sources(res, answer, stream=True, view_sources=False):
51
  else:
52
  full_answer += "\n\n**Sources:**\n"
53
  for idx, (url_name, source_data) in enumerate(source_dict.items()):
54
-
55
  full_answer += f"\nSource {idx + 1} (Score: {source_data['score']}): {source_data['url']}\n"
56
 
57
  name = f"Source {idx + 1} Text\n"
 
42
  full_answer += answer
43
 
44
  if view_sources:
 
45
  # Then, display the sources
46
  # check if the answer has sources
47
  if len(source_dict) == 0:
 
50
  else:
51
  full_answer += "\n\n**Sources:**\n"
52
  for idx, (url_name, source_data) in enumerate(source_dict.items()):
 
53
  full_answer += f"\nSource {idx + 1} (Score: {source_data['score']}): {source_data['url']}\n"
54
 
55
  name = f"Source {idx + 1} Text\n"
code/modules/chat/langchain/langchain_rag.py CHANGED
@@ -19,7 +19,6 @@ from .utils import (
19
 
20
 
21
  class Langchain_RAG_V1(BaseRAG):
22
-
23
  def __init__(
24
  self,
25
  llm,
 
19
 
20
 
21
  class Langchain_RAG_V1(BaseRAG):
 
22
  def __init__(
23
  self,
24
  llm,
code/modules/chat/langchain/utils.py CHANGED
@@ -26,7 +26,6 @@ CHAT_TURN_TYPE = Union[Tuple[str, str], BaseMessage]
26
 
27
 
28
  class CustomConversationalRetrievalChain(ConversationalRetrievalChain):
29
-
30
  def _get_chat_history(self, chat_history: List[CHAT_TURN_TYPE]) -> str:
31
  _ROLE_MAP = {"human": "Student: ", "ai": "AI Tutor: "}
32
  buffer = ""
@@ -139,7 +138,6 @@ class CustomConversationalRetrievalChain(ConversationalRetrievalChain):
139
 
140
 
141
  class CustomRunnableWithHistory(RunnableWithMessageHistory):
142
-
143
  def _get_chat_history(self, chat_history: List[CHAT_TURN_TYPE]) -> str:
144
  _ROLE_MAP = {"human": "Student: ", "ai": "AI Tutor: "}
145
  buffer = ""
@@ -282,7 +280,6 @@ def create_retrieval_chain(
282
 
283
  # TODO: Remove Hard-coded values
284
  async def return_questions(query, response, chat_history_str, context, config):
285
-
286
  system = (
287
  "You are someone that suggests a question based on the student's input and chat history. "
288
  "Generate a question that is relevant to the student's input and chat history. "
 
26
 
27
 
28
  class CustomConversationalRetrievalChain(ConversationalRetrievalChain):
 
29
  def _get_chat_history(self, chat_history: List[CHAT_TURN_TYPE]) -> str:
30
  _ROLE_MAP = {"human": "Student: ", "ai": "AI Tutor: "}
31
  buffer = ""
 
138
 
139
 
140
  class CustomRunnableWithHistory(RunnableWithMessageHistory):
 
141
  def _get_chat_history(self, chat_history: List[CHAT_TURN_TYPE]) -> str:
142
  _ROLE_MAP = {"human": "Student: ", "ai": "AI Tutor: "}
143
  buffer = ""
 
280
 
281
  # TODO: Remove Hard-coded values
282
  async def return_questions(query, response, chat_history_str, context, config):
 
283
  system = (
284
  "You are someone that suggests a question based on the student's input and chat history. "
285
  "Generate a question that is relevant to the student's input and chat history. "
code/modules/chat_processor/helpers.py CHANGED
@@ -156,7 +156,6 @@ async def update_user_info(user_info):
156
 
157
 
158
  async def check_user_cooldown(user_info, current_time):
159
-
160
  # # Check if no tokens left
161
  tokens_left = user_info.metadata.get("tokens_left", 0)
162
  if tokens_left > 0 and not user_info.metadata.get("in_cooldown", False):
@@ -214,7 +213,6 @@ async def reset_tokens_for_user(user_info):
214
 
215
  # Calculate how many tokens should have been regenerated proportionally
216
  if current_tokens < max_tokens:
217
-
218
  # Calculate the regeneration rate per second based on REGEN_TIME for full regeneration
219
  regeneration_rate_per_second = max_tokens / REGEN_TIME
220
 
 
156
 
157
 
158
  async def check_user_cooldown(user_info, current_time):
 
159
  # # Check if no tokens left
160
  tokens_left = user_info.metadata.get("tokens_left", 0)
161
  if tokens_left > 0 and not user_info.metadata.get("in_cooldown", False):
 
213
 
214
  # Calculate how many tokens should have been regenerated proportionally
215
  if current_tokens < max_tokens:
 
216
  # Calculate the regeneration rate per second based on REGEN_TIME for full regeneration
217
  regeneration_rate_per_second = max_tokens / REGEN_TIME
218
 
code/modules/config/project_config.yml CHANGED
@@ -3,5 +3,5 @@ retriever:
3
  RAGatouille: "XThomasBU/Colbert_Index"
4
 
5
  metadata:
6
- metada_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
  slide_base_link: "https://dl4ds.github.io"
 
3
  RAGatouille: "XThomasBU/Colbert_Index"
4
 
5
  metadata:
6
+ metadata_links: ["https://dl4ds.github.io/sp2024/lectures/", "https://dl4ds.github.io/sp2024/schedule/"]
7
  slide_base_link: "https://dl4ds.github.io"
code/modules/dataloader/data_loader.py CHANGED
@@ -222,7 +222,7 @@ class ChunkProcessor:
222
 
223
  def chunk_docs(self, file_reader, uploaded_files, weblinks):
224
  addl_metadata = get_metadata(
225
- *self.config["metadata"]["metada_links"], self.config
226
  ) # For any additional metadata
227
 
228
  # remove already processed files if reparse_files is False
@@ -324,7 +324,6 @@ class ChunkProcessor:
324
  return
325
 
326
  try:
327
-
328
  if file_path in self.document_data:
329
  self.logger.warning(f"File {file_name} already processed")
330
  documents = [
@@ -440,13 +439,16 @@ if __name__ == "__main__":
440
 
441
  data_loader = DataLoader(config, logger=logger)
442
  # Just for testing
443
- document_chunks, document_names, documents, document_metadata = (
444
- data_loader.get_chunks(
445
- [
446
- "https://dl4ds.github.io/fa2024/static_files/discussion_slides/00_discussion.pdf"
447
- ],
448
- [],
449
- )
 
 
 
450
  )
451
 
452
  print(document_names[:5])
 
222
 
223
  def chunk_docs(self, file_reader, uploaded_files, weblinks):
224
  addl_metadata = get_metadata(
225
+ *self.config["metadata"]["metadata_links"], self.config
226
  ) # For any additional metadata
227
 
228
  # remove already processed files if reparse_files is False
 
324
  return
325
 
326
  try:
 
327
  if file_path in self.document_data:
328
  self.logger.warning(f"File {file_name} already processed")
329
  documents = [
 
439
 
440
  data_loader = DataLoader(config, logger=logger)
441
  # Just for testing
442
+ (
443
+ document_chunks,
444
+ document_names,
445
+ documents,
446
+ document_metadata,
447
+ ) = data_loader.get_chunks(
448
+ [
449
+ "https://dl4ds.github.io/fa2024/static_files/discussion_slides/00_discussion.pdf"
450
+ ],
451
+ [],
452
  )
453
 
454
  print(document_names[:5])
code/modules/retriever/helpers.py CHANGED
@@ -6,7 +6,6 @@ from typing import List
6
 
7
 
8
  class VectorStoreRetrieverScore(VectorStoreRetriever):
9
-
10
  # See https://github.com/langchain-ai/langchain/blob/61dd92f8215daef3d9cf1734b0d1f8c70c1571c3/libs/langchain/langchain/vectorstores/base.py#L500
11
  def _get_relevant_documents(
12
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
 
6
 
7
 
8
  class VectorStoreRetrieverScore(VectorStoreRetriever):
 
9
  # See https://github.com/langchain-ai/langchain/blob/61dd92f8215daef3d9cf1734b0d1f8c70c1571c3/libs/langchain/langchain/vectorstores/base.py#L500
10
  def _get_relevant_documents(
11
  self, query: str, *, run_manager: CallbackManagerForRetrieverRun
code/modules/vectorstore/store_manager.py CHANGED
@@ -47,7 +47,6 @@ class VectorStoreManager:
47
  return logger
48
 
49
  def load_files(self):
50
-
51
  files = os.listdir(self.config["vectorstore"]["data_path"])
52
  files = [
53
  os.path.join(self.config["vectorstore"]["data_path"], file)
@@ -69,7 +68,6 @@ class VectorStoreManager:
69
  return files, urls
70
 
71
  def create_embedding_model(self):
72
-
73
  self.logger.info("Creating embedding function")
74
  embedding_model_loader = EmbeddingModelLoader(self.config)
75
  embedding_model = embedding_model_loader.load_embedding_model()
@@ -100,7 +98,6 @@ class VectorStoreManager:
100
  )
101
 
102
  def create_database(self):
103
-
104
  start_time = time.time() # Start time for creating database
105
  data_loader = DataLoader(self.config, self.logger)
106
  self.logger.info("Loading data")
@@ -110,9 +107,12 @@ class VectorStoreManager:
110
  self.logger.info(f"Number of webpages: {len(webpages)}")
111
  if f"{self.config['vectorstore']['url_file_path']}" in files:
112
  files.remove(f"{self.config['vectorstores']['url_file_path']}") # cleanup
113
- document_chunks, document_names, documents, document_metadata = (
114
- data_loader.get_chunks(files, webpages)
115
- )
 
 
 
116
  num_documents = len(document_chunks)
117
  self.logger.info(f"Number of documents in the DB: {num_documents}")
118
  metadata_keys = list(document_metadata[0].keys()) if document_metadata else []
@@ -128,7 +128,6 @@ class VectorStoreManager:
128
  )
129
 
130
  def load_database(self):
131
-
132
  start_time = time.time() # Start time for loading database
133
  if self.config["vectorstore"]["db_option"] in ["FAISS", "Chroma", "RAPTOR"]:
134
  self.embedding_model = self.create_embedding_model()
 
47
  return logger
48
 
49
  def load_files(self):
 
50
  files = os.listdir(self.config["vectorstore"]["data_path"])
51
  files = [
52
  os.path.join(self.config["vectorstore"]["data_path"], file)
 
68
  return files, urls
69
 
70
  def create_embedding_model(self):
 
71
  self.logger.info("Creating embedding function")
72
  embedding_model_loader = EmbeddingModelLoader(self.config)
73
  embedding_model = embedding_model_loader.load_embedding_model()
 
98
  )
99
 
100
  def create_database(self):
 
101
  start_time = time.time() # Start time for creating database
102
  data_loader = DataLoader(self.config, self.logger)
103
  self.logger.info("Loading data")
 
107
  self.logger.info(f"Number of webpages: {len(webpages)}")
108
  if f"{self.config['vectorstore']['url_file_path']}" in files:
109
  files.remove(f"{self.config['vectorstores']['url_file_path']}") # cleanup
110
+ (
111
+ document_chunks,
112
+ document_names,
113
+ documents,
114
+ document_metadata,
115
+ ) = data_loader.get_chunks(files, webpages)
116
  num_documents = len(document_chunks)
117
  self.logger.info(f"Number of documents in the DB: {num_documents}")
118
  metadata_keys = list(document_metadata[0].keys()) if document_metadata else []
 
128
  )
129
 
130
  def load_database(self):
 
131
  start_time = time.time() # Start time for loading database
132
  if self.config["vectorstore"]["db_option"] in ["FAISS", "Chroma", "RAPTOR"]:
133
  self.embedding_model = self.create_embedding_model()