Engg-SS_ChatBOT / embed.py
abhivsh's picture
Update embed.py
692c6b0 verified
raw
history blame
1.35 kB
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits
from langchain_ollama import OllamaEmbeddings
import ollama
import chromadb
import requests # Added import for requests
HF_token = os.environ.get('HF_token')
# Embedding Function
def embed(texts):
model_id = "sentence-transformers/all-MiniLM-L6-v2"
api_url = f"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fpipeline%2Ffeature-extraction%2F%3Cspan class="hljs-subst">{model_id}"
headers = {"Authorization": f"Bearer {HF_token}"}
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}})
response.raise_for_status() # Added error handling for the request
return response.json()
# Create Embeddings for Searching the Splits
persist_directory = './chroma/'
# create the open-source embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
def initialize():
splits = gen_splits.gen_splits()
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_function)
vectordb.persist()
return vectordb
# if __name__ == "__main__":
# vectordb = initialize()