Spaces:
Runtime error
Runtime error
import argparse | |
# from dataclasses import dataclass | |
from langchain_community.vectorstores import Chroma | |
#from langchain_openai import OpenAIEmbeddings | |
#from langchain_openai import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
# a template by which the bot will answer the quetion according the "context" of | |
# the text that will be imported as context later, determines the information that the question should be answered according to. | |
PROMPT_TEMPLATE = """ | |
Answer the question based only on the following context: | |
{context} | |
--- | |
Answer the question based on the above context: {question} | |
""" | |
# from langchain.document_loaders import DirectoryLoader | |
from langchain_community.document_loaders import DirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.schema import Document | |
# from langchain.embeddings import OpenAIEmbeddings | |
#from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores import Chroma | |
import openai | |
from dotenv import load_dotenv | |
import os | |
import shutil | |
# a custom embedding | |
from sentence_transformers import SentenceTransformer | |
from langchain_experimental.text_splitter import SemanticChunker | |
from typing import List | |
class MyEmbeddings: | |
def __init__(self): | |
self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
def embed_documents(self, texts: List[str]) -> List[List[float]]: | |
return [self.model.encode(t).tolist() for t in texts] | |
def embed_query(self, query: str) -> List[float]: | |
return [self.model.encode([query])][0][0].tolist() | |
embeddings = MyEmbeddings() | |
splitter = SemanticChunker(embeddings) | |
CHROMA_PATH = "chroma8" | |
# call the chroma generated in a directory | |
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) | |
import os | |
from huggingface_hub import login | |
# Retrieve the token from the environment variable | |
token = os.getenv('HF_Token') | |
# Log in using the token | |
login(token=token) | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf | |
tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True) | |
from transformers import pipeline | |
llama_pipeline = pipeline( | |
"text-generation", # LLM task | |
model=model, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
) | |
def get_response(prompt: str) -> None: | |
""" | |
Generate a response from the Llama model. | |
Parameters: | |
prompt (str): The user's input/question for the model. | |
Returns: | |
None: Prints the model's response. | |
""" | |
sequences = llama_pipeline( | |
prompt, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=256, | |
) | |
print("Chatbot:", sequences[0]['generated_text']) | |
template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww" | |
Context: | |
{context} | |
{query}""".strip() | |
from langchain.prompts import PromptTemplate | |
prompt_template = PromptTemplate( | |
input_variables=["query", "context"], | |
template=template | |
) | |
# Generate a response from the Llama model | |
def get_llama_response(message: str, history: list) -> str: | |
""" | |
Generates a conversational response from the Llama model. | |
Parameters: | |
message (str): User's input message. | |
history (list): Past conversation history. | |
Returns: | |
str: Generated response from the Llama model. | |
""" | |
print('messageeeeeeeeeeeeeee:',message) | |
#query = format_message(message, history) | |
response = "" | |
query = """ | |
Answer the question based only on the following context. Dont provide any information out of the context: | |
{context} | |
--- | |
Answer the question based on the above context: {question} | |
""" | |
#message='how does alice meet the mad hatter?' | |
###################### | |
# Search the DB for similar documents to the query. | |
print('before searching inside the db') | |
results = db.similarity_search_with_relevance_scores(message, k=3) | |
if len(results) == 0 or results[0][1] < 0.5: | |
print(f"Unable to find matching results.") | |
print('after searchingf insidee the db') | |
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) | |
#context_text="amin is a math student." | |
####################3 | |
query = prompt_template.format(query=message, context=context_text) | |
#query=query.format(context=context_text,question=message) | |
print('im gonna generate response') | |
sequences = llama_pipeline( | |
query, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=1024, | |
) | |
print('igenerated response') | |
generated_text = sequences[0]['generated_text'] | |
response = generated_text[len(query):] # Remove the prompt from the output | |
print("Chatbot:", response.strip()) | |
print('i wanneea return') | |
return response.strip() | |
import gradio as gr | |
gr.ChatInterface(get_llama_response).launch() |