Spaces:

AminFaraji
/

FirstSpace

Runtime error

File size: 5,166 Bytes

import argparse
# from dataclasses import dataclass
from langchain_community.vectorstores import Chroma
#from langchain_openai import OpenAIEmbeddings
#from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate


# a template by which the bot will answer the quetion according the "context" of
# the text that will be imported as context later, determines the information that the question should be answered according to.
PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""
# from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
# from langchain.embeddings import OpenAIEmbeddings
#from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import openai
from dotenv import load_dotenv
import os
import shutil
# a custom embedding
from sentence_transformers import SentenceTransformer
from langchain_experimental.text_splitter import SemanticChunker
from typing import List


class MyEmbeddings:
    def __init__(self):
        self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.model.encode(t).tolist() for t in texts]
    def embed_query(self, query: str) -> List[float]:
            return [self.model.encode([query])][0][0].tolist()


embeddings = MyEmbeddings()

splitter = SemanticChunker(embeddings)
CHROMA_PATH = "chroma8"
# call the chroma generated in a directory
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)


import os
from huggingface_hub import login

# Retrieve the token from the environment variable
token = os.getenv('HF_Token')

# Log in using the token
login(token=token)
from transformers import AutoTokenizer
import transformers
import torch

model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf

tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)




from transformers import pipeline

llama_pipeline = pipeline(
    "text-generation",  # LLM task
    model=model,
    torch_dtype=torch.float16,
    device_map="auto",
)



def get_response(prompt: str) -> None:
    """
    Generate a response from the Llama model.

    Parameters:
        prompt (str): The user's input/question for the model.

    Returns:
        None: Prints the model's response.
    """
    sequences = llama_pipeline(
        prompt,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=256,
    )
    print("Chatbot:", sequences[0]['generated_text'])




template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww"

Context:
{context}

{query}""".strip()

from langchain.prompts import PromptTemplate

prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template=template
)











# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
    """
    Generates a conversational response from the Llama model.

    Parameters:
        message (str): User's input message.
        history (list): Past conversation history.

    Returns:
        str: Generated response from the Llama model.
    """
    print('messageeeeeeeeeeeeeee:',message)
    #query = format_message(message, history)
    response = ""

    query = """
    Answer the question based only on the following context. Dont provide any information out of the context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """

    #message='how does alice meet the mad hatter?'
    ######################
    # Search the DB for similar documents to the query.
    print('before searching inside the db')
    results = db.similarity_search_with_relevance_scores(message, k=3)
    if len(results) == 0 or results[0][1] < 0.5:
        print(f"Unable to find matching results.")
    print('after searchingf insidee the db')


    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    #context_text="amin is a math student."
    ####################3

    query = prompt_template.format(query=message, context=context_text)
    #query=query.format(context=context_text,question=message)
    print('im gonna generate response')
    sequences = llama_pipeline(
        query,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=1024,
    )

    print('igenerated response')
    generated_text = sequences[0]['generated_text']
    response = generated_text[len(query):]  # Remove the prompt from the output

    print("Chatbot:", response.strip())
    print('i wanneea return')
    return response.strip()



import gradio as gr

gr.ChatInterface(get_llama_response).launch()