from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import Chroma import os import gen_splits from langchain_ollama import OllamaEmbeddings import ollama import chromadb GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY') OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') # Using Google GenAI Text Embeddings # embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_query", google_api_key=GEMINI_API_KEY) ### embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small") #embedding_model = OllamaEmbeddings(model="mxbai-embed-large") # Create Embeddings for Searching the Splits persist_directory = './chroma/' def initialize(): splits = gen_splits.gen_splits() vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model) vectordb.persist() return vectordb if __name__ == "__main__": vectordb = initialize()