File size: 1,070 Bytes
9091381
2f796ab
9091381
ff2a712
78c6667
4fafa59
4cc4dc7
75a763d
4fafa59
78c6667
e391b31
ecef4cf
9091381
 
ecef4cf
 
61c450e
4fafa59
61c450e
9091381
 
 
78c6667
2756981
5a0035a
c8b9e42
 
bff30fa
2756981
53d161a
c8b9e42
53d161a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits
from langchain_ollama import OllamaEmbeddings
import ollama
import chromadb


GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

# Using Google GenAI Text Embeddings  
# embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_query", google_api_key=GEMINI_API_KEY)         ###

embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small")

#embedding_model = OllamaEmbeddings(model="mxbai-embed-large")

# Create Embeddings for Searching the Splits
persist_directory = './chroma/'

def initialize():
    splits = gen_splits.gen_splits()
    vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
    vectordb.persist()
    return vectordb



if __name__ == "__main__":
    vectordb = initialize()