Spaces:
Sleeping
Sleeping
File size: 1,070 Bytes
9091381 2f796ab 9091381 ff2a712 78c6667 4fafa59 4cc4dc7 75a763d 4fafa59 78c6667 e391b31 ecef4cf 9091381 ecef4cf 61c450e 4fafa59 61c450e 9091381 78c6667 2756981 5a0035a c8b9e42 bff30fa 2756981 53d161a c8b9e42 53d161a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits
from langchain_ollama import OllamaEmbeddings
import ollama
import chromadb
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
# Using Google GenAI Text Embeddings
# embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_query", google_api_key=GEMINI_API_KEY) ###
embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small")
#embedding_model = OllamaEmbeddings(model="mxbai-embed-large")
# Create Embeddings for Searching the Splits
persist_directory = './chroma/'
def initialize():
splits = gen_splits.gen_splits()
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
vectordb.persist()
return vectordb
if __name__ == "__main__":
vectordb = initialize()
|