Spaces:
Sleeping
Sleeping
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores import Chroma | |
import os | |
import gen_splits | |
from langchain_ollama import OllamaEmbeddings | |
import ollama | |
import chromadb | |
import requests # Added import for requests | |
HF_token = os.environ.get('HF_token') | |
# Embedding Function | |
def embed(texts): | |
model_id = "sentence-transformers/all-MiniLM-L6-v2" | |
api_url = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fpipeline%2Ffeature-extraction%2F%3Cspan class="hljs-subst">{model_id}" | |
headers = {"Authorization": f"Bearer {HF_token}"} | |
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}}) | |
response.raise_for_status() # Added error handling for the request | |
return response.json() | |
# Create Embeddings for Searching the Splits | |
persist_directory = './chroma/' | |
# create the open-source embedding function | |
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
def initialize(): | |
splits = gen_splits.gen_splits() | |
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_function) | |
vectordb.persist() | |
return vectordb | |
# if __name__ == "__main__": | |
# vectordb = initialize() |