Spaces:
Sleeping
Sleeping
File size: 1,667 Bytes
058640f b884c59 058640f b884c59 058640f b884c59 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from gradio_client import Client
system_prompt = """You are a helpful assistant, you will use the provided context only to answer user questions.
Read the given context before answering questions and think step by step. you could get context or question in other language than english.
Answer only if the question related to the contexts, don't use your own data ..if the question isn't related to the context ,respond with "sorry..no provided context for this question".
If you can not answer a user question based on provided context only , inform the user.
Do not use any other information for answering user. Provide a detailed answer to the question."""
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS
instruction = """
Context: {context}
User: {question}"""
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
def connect_to_llama(query,context):
client = Client("https://huggingface-projects-llama-2-13b-chat.hf.space/--replicas/5c42d8wx6/")
result = client.predict(
"""
question:"{}"
context:"{}"
answer:
""".format(query, context), # str in 'parameter_7' Textbox component
prompt_template , # str in 'Optional system prompt' Textbox component
4096, # int | float (numeric value between 0 and 4096) in 'Max new tokens' Slider componentو
0.1,
0.05,
1, # int | float (numeric value between 0.0 and 1) in 'Top-p (nucleus sampling)' Slider component
1, # int | float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
api_name="/chat"
)
# print(time.time() - old, "sec")
return result |