abhivsh commited on
Commit
2756981
·
verified ·
1 Parent(s): 60bfd8b

Update initialize.py

Browse files
Files changed (1) hide show
  1. initialize.py +35 -9
initialize.py CHANGED
@@ -22,23 +22,49 @@ embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding
22
  # Create Embeddings for Searching the Splits
23
  persist_directory = './chroma/'
24
 
25
- def initialize():
26
- # splits = gen_splits.gen_splits()
27
- # vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
28
- # vectordb.persist()
 
 
 
 
 
 
 
 
 
 
 
 
29
 
 
 
30
  splits = gen_splits.gen_splits()
31
  client = chromadb.Client()
32
  collection = client.create_collection(name="docs")
33
- print(splits)
 
34
  # store each document in a vector embedding database
35
  for i, d in enumerate(splits):
36
- response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
37
- embedding = response["embedding"]
38
- collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  return collection
41
-
42
 
43
  if __name__ == "__main__":
44
  vectordb = initialize()
 
22
  # Create Embeddings for Searching the Splits
23
  persist_directory = './chroma/'
24
 
25
+ # def initialize():
26
+ # # splits = gen_splits.gen_splits()
27
+ # # vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
28
+ # # vectordb.persist()
29
+
30
+ # splits = gen_splits.gen_splits()
31
+ # client = chromadb.Client()
32
+ # collection = client.create_collection(name="docs")
33
+ # print(splits)
34
+ # # store each document in a vector embedding database
35
+ # for i, d in enumerate(splits):
36
+ # response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
37
+ # embedding = response["embedding"]
38
+ # collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
39
+
40
+ # return collection
41
 
42
+
43
+ def initialize():
44
  splits = gen_splits.gen_splits()
45
  client = chromadb.Client()
46
  collection = client.create_collection(name="docs")
47
+
48
+
49
  # store each document in a vector embedding database
50
  for i, d in enumerate(splits):
51
+ success = False
52
+ attempts = 0
53
+ max_attempts = 5
54
+
55
+ while not success and attempts < max_attempts:
56
+ try:
57
+ response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
58
+ embedding = response["embedding"]
59
+ collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])
60
+ success = True
61
+ except httpx.ConnectError as e:
62
+ attempts += 1
63
+ print(f"Connection failed (attempt {attempts}): {e}")
64
+ time.sleep(2) # retry after waiting for 2 seconds
65
 
66
  return collection
67
+
68
 
69
  if __name__ == "__main__":
70
  vectordb = initialize()