FirstSpace / app.py
AminFaraji's picture
Update app.py
ad9890e verified
raw
history blame
4.83 kB
import argparse
# from dataclasses import dataclass
from langchain_community.vectorstores import Chroma
#from langchain_openai import OpenAIEmbeddings
#from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
# a template by which the bot will answer the quetion according the "context" of
# the text that will be imported as context later, determines the information that the question should be answered according to.
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""
# from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
# from langchain.embeddings import OpenAIEmbeddings
#from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import openai
from dotenv import load_dotenv
import os
import shutil
# a custom embedding
from sentence_transformers import SentenceTransformer
from langchain_experimental.text_splitter import SemanticChunker
from typing import List
class MyEmbeddings:
def __init__(self):
self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
def embed_documents(self, texts: List[str]) -> List[List[float]]:
return [self.model.encode(t).tolist() for t in texts]
def embed_query(self, query: str) -> List[float]:
return [self.model.encode([query])][0][0].tolist()
embeddings = MyEmbeddings()
splitter = SemanticChunker(embeddings)
CHROMA_PATH = "chroma8"
# call the chroma generated in a directory
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
!huggingface-cli login
from transformers import AutoTokenizer
import transformers
import torch
model = "tiiuae/falcon-7b-instruct" # meta-llama/Llama-2-7b-chat-hf
tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)
from transformers import pipeline
llama_pipeline = pipeline(
"text-generation", # LLM task
model=model,
torch_dtype=torch.float16,
device_map="auto",
)
def get_response(prompt: str) -> None:
"""
Generate a response from the Llama model.
Parameters:
prompt (str): The user's input/question for the model.
Returns:
None: Prints the model's response.
"""
sequences = llama_pipeline(
prompt,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=256,
)
print("Chatbot:", sequences[0]['generated_text'])
template = """Answer the query based only the provided context, and if the answer is not contained within the context below, say "I don't knowwwww"
Context:
{context}
{query}""".strip()
from langchain.prompts import PromptTemplate
prompt_template = PromptTemplate(
input_variables=["query", "context"],
template=template
)
# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
"""
Generates a conversational response from the Llama model.
Parameters:
message (str): User's input message.
history (list): Past conversation history.
Returns:
str: Generated response from the Llama model.
"""
print('messageeeeeeeeeeeeeee:',message)
#query = format_message(message, history)
response = ""
query = """
Answer the question based only on the following context. Dont provide any information out of the context:
{context}
---
Answer the question based on the above context: {question}
"""
#message='how does alice meet the mad hatter?'
######################
# Search the DB for similar documents to the query.
results = db.similarity_search_with_relevance_scores(message, k=3)
if len(results) == 0 or results[0][1] < 0.5:
print(f"Unable to find matching results.")
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
#context_text="amin is a math student."
####################3
query = prompt_template.format(query=message, context=context_text)
#query=query.format(context=context_text,question=message)
sequences = llama_pipeline(
query,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,
max_length=1024,
)
generated_text = sequences[0]['generated_text']
response = generated_text[len(query):] # Remove the prompt from the output
print("Chatbot:", response.strip())
return response.strip()
import gradio as gr
gr.ChatInterface(get_llama_response).launch()