from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from langchain import HuggingFaceHub from langchain.llms.base import LLM from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory from langchain.chains import LLMChain, ConversationChain from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from langchain_community.llms import HuggingFaceEndpoint from langchain.prompts import PromptTemplate, ChatPromptTemplate from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate import os import gradio as gr import spaces your_endpoint_url = "https://kp4xdy196cw81uf3.us-east-1.aws.endpoints.huggingface.cloud" token = os.environ["API_TOKEN"] llm = HuggingFaceEndpoint( endpoint_url=f"{your_endpoint_url}", huggingfacehub_api_token = f"{token}", task = "text-generation", max_new_tokens=128, top_k=10, top_p=0.95, typical_p=0.95, temperature=0.01, repetition_penalty=1.03 ) #print(llm) def chat_template_prompt(): template = """ Do not repeat questions and do not generate answer for user/human. You are a helpful hotel booking asssitant. Below is an instruction that describes a task. Write a response that appropriately completes the request. Reply with the most helpful and logic answer. During the conversation you need to ask the user the following questions to complete the hotel booking task. 1) Where would you like to stay and when? 2) How many people are staying in the room? 3) Do you prefer any ammenities like breakfast included or gym? 4) What is your name, your email address and phone number? When the booking task is completed, respond with "Thank you for choosing us.". {history} """ system_prompt = SystemMessagePromptTemplate.from_template(template) human_prompt = HumanMessagePromptTemplate.from_template("{input}") chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt]) return chat_prompt def chain(): #memory = ConversationBufferMemory(memory_key="history") chat_prompt = chat_template_prompt() memory = ConversationBufferWindowMemory(k=3) #memory_key="history" llm_chain = LLMChain(llm=llm, memory = memory, prompt = chat_prompt) memory.load_memory_variables({}) #Initialize memory return llm_chain @spaces.GPU def chat_output(message, history): llm_chaim = chain() result = llm_chaim.predict(input = message) return result with gr.Blocks() as demo: chatbot_component = gr.Chatbot(height=300, label = "history") textbox_component = gr.Textbox(placeholder="Can I help you to book a hotel?", container=False, label = "input", scale=7) demo.chatbot_interface = gr.ChatInterface( fn=chat_output, examples = ["Hello I would like to book a hotel room.", "Hello I want to stay in Nuremberg in 30th of May." ], #outputs=chatbot_component, title = "Hotel Booking Assistant Chat 🤗", description = "I am your hotel booking assistant. Feel free to start chatting with me." ) demo.launch()