import os from langchain.document_loaders import PyPDFLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS def create_faiss_index(): try: # Ensure the 'docs' directory exists and contains files docs_directory = 'docs' if not os.path.exists(docs_directory) or not os.listdir(docs_directory): raise ValueError(f"Directory '{docs_directory}' is empty or does not exist.") # Load all documents from the 'docs' directory documents = [] for file in os.listdir(docs_directory): if file.endswith('.pdf'): loader = PyPDFLoader(os.path.join(docs_directory, file)) documents.extend(loader.load()) if not documents: raise ValueError("No valid documents found in the 'docs' directory.") # Create embeddings using HuggingFace's 'sentence-transformers/all-MiniLM-L6-v2' model embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Create the FAISS vector store index faiss_index = FAISS.from_documents(documents, embeddings) # Save the FAISS index locally index_path = "faiss_index" os.makedirs(index_path, exist_ok=True) faiss_index.save_local(index_path) print("FAISS index created and saved successfully.") except Exception as e: print(f"An error occurred during FAISS index creation: {e}") if __name__ == "__main__": create_faiss_index()