import argparse # from dataclasses import dataclass from langchain_community.vectorstores import Chroma #from langchain_openai import OpenAIEmbeddings #from langchain_openai import ChatOpenAI from langchain.prompts import ChatPromptTemplate # a template by which the bot will answer the quetion according the "context" of # the text that will be imported as context later, determines the information that the question should be answered according to. PROMPT_TEMPLATE = """ Answer the question based only on the following context: {context} --- Answer the question based on the above context: {question} """ # from langchain.document_loaders import DirectoryLoader from langchain_community.document_loaders import DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.schema import Document # from langchain.embeddings import OpenAIEmbeddings #from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import Chroma import openai from dotenv import load_dotenv import os import shutil # a custom embedding from sentence_transformers import SentenceTransformer from langchain_experimental.text_splitter import SemanticChunker from typing import List class MyEmbeddings: def __init__(self): self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") def embed_documents(self, texts: List[str]) -> List[List[float]]: return [self.model.encode(t).tolist() for t in texts] def embed_query(self, query: str) -> List[float]: return [self.model.encode([query])][0][0].tolist() embeddings = MyEmbeddings() splitter = SemanticChunker(embeddings) CHROMA_PATH = "chroma8" # call the chroma generated in a directory db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) import os from huggingface_hub import login # Retrieve the token from the environment variable token = os.getenv('HF_Token') # Log in using the token login(token=token) from transformers import AutoTokenizer import transformers import torch model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True) from transformers import pipeline llama_pipeline = pipeline( "text-generation", # LLM task model=model, torch_dtype=torch.float16, device_map="auto", ) def get_response(prompt: str) -> None: """ Generate a response from the Llama model. Parameters: prompt (str): The user's input/question for the model. Returns: None: Prints the model's response. """ sequences = llama_pipeline( prompt, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=256, ) print("Chatbot:", sequences[0]['generated_text']) template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww" Context: {context} {query}""".strip() from langchain.prompts import PromptTemplate prompt_template = PromptTemplate( input_variables=["query", "context"], template=template ) # Generate a response from the Llama model def get_llama_response(message: str, history: list) -> str: """ Generates a conversational response from the Llama model. Parameters: message (str): User's input message. history (list): Past conversation history. Returns: str: Generated response from the Llama model. """ print('messageeeeeeeeeeeeeee:',message) #query = format_message(message, history) response = "" query = """ Answer the question based only on the following context. Dont provide any information out of the context: {context} --- Answer the question based on the above context: {question} """ #message='how does alice meet the mad hatter?' ###################### # Search the DB for similar documents to the query. print('before searching inside the db') results = db.similarity_search_with_relevance_scores(message, k=3) if len(results) == 0 or results[0][1] < 0.5: print(f"Unable to find matching results.") print('after searchingf insidee the db') context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) #context_text="amin is a math student." ####################3 query = prompt_template.format(query=message, context=context_text) #query=query.format(context=context_text,question=message) print('im gonna generate response') sequences = llama_pipeline( query, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, max_length=1024, ) print('igenerated response') generated_text = sequences[0]['generated_text'] response = generated_text[len(query):] # Remove the prompt from the output print("Chatbot:", response.strip()) print('i wanneea return') return response.strip() import gradio as gr gr.ChatInterface(get_llama_response).launch()