Spaces:
Runtime error
Runtime error
import llama_cpp | |
import llama_cpp.llama_tokenizer | |
from llama_cpp import Llama | |
import gradio as gr | |
from loguru import logger | |
import psutil | |
from ctransformers import AutoModelForCausalLM,AutoTokenizer | |
prompt_template = """[INST] <<SYS>> | |
You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign. | |
<</SYS>> | |
{question} [/INST] | |
""" | |
model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf" | |
model_loc = "TheBloke/Llama-2-13B-chat-GGUF" | |
llama = AutoModelForCausalLM.from_pretrained( | |
model_loc, | |
model_type="llama", | |
context_length=4096, | |
max_new_tokens=2048, | |
hf=True | |
# threads=cpu_count, | |
) | |
# llama = llama_cpp.Llama.from_pretrained( | |
# #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
# #filename="*q8_0.gguf", | |
# mode_path=model_loc, | |
# model_type="llama", | |
# context_length=4096, | |
# max_new_tokens=2048, | |
# filename="llama-2-13b-chat.Q5_K_M.gguf", | |
# tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
# verbose=False | |
# ) | |
# llama = Llama( | |
# model_path=model_loc, | |
# max_tokens=4096, | |
# n_ctx=4096, | |
# verbose=False, | |
# ) | |
_ = [elm for elm in prompt_template.splitlines() if elm.strip()] | |
stop_string = [elm.split(":")[0] + ":" for elm in _][-2] | |
logger.debug(f"{stop_string=}") | |
_ = psutil.cpu_count(logical=False) - 1 | |
cpu_count: int = int(_) if _ else 1 | |
logger.debug(f"{cpu_count=}") | |
model = "gpt-3.5-turbo" | |
def predict(message, history): | |
messages = [] | |
prompt = prompt_template.format(question=message) | |
for user_message, assistant_message in history: | |
messages.append({"role": "system", "content": prompt},) | |
messages.append({"role": "user", "content": user_message}) | |
messages.append({"role": "assistant", "content": assistant_message}) | |
messages.append({"role": "user", "content": message}) | |
response = llama.create_chat_completion_openai_v1( | |
model=model, | |
messages=messages, | |
response_format={ | |
"type": "json_object", | |
"schema": { | |
"type": "object", | |
"properties": {"title": {"type": "string"}, | |
#"description": {"type": "string"}, | |
"sample_pitch": {"type": "string"}, | |
"amount": {"type": "string"}, | |
"location": {"type": "string"}}, | |
"required": ["title","sample_pitch","amount","location"], #description | |
}, | |
}, | |
temperature=0.7, | |
stream=True | |
) | |
text = "" | |
for chunk in response: | |
content = chunk.choices[0].delta.content | |
if content: | |
text += content | |
yield text | |
def generate(message): | |
try: | |
messages = [] | |
prompt = prompt_template.format(question=message) | |
#for user_message, assistant_message in history: | |
messages.append({"role": "system", "content": prompt},) | |
#messages.append({"role": "user", "content": user_message}) | |
#messages.append({"role": "assistant", "content": assistant_message}) | |
messages.append({"role": "user", "content": message}) | |
response = llama.create_chat_completion_openai_v1( | |
model=model, | |
messages=messages, | |
response_format={ | |
"type": "json_object", | |
"schema": { | |
"type": "object", | |
"properties": {"title": {"type": "string"}, | |
#"description": {"type": "string"}, | |
"sample_pitch": {"type": "string"}, | |
"amount": {"type": "string"}, | |
"location": {"type": "string"}}, | |
"required": ["title","sample_pitch","amount","location"], #description | |
}, | |
}, | |
temperature=0.7, | |
stream=False) | |
# text = "" | |
# for chunk in response: | |
# content = chunk.choices[0].delta.content | |
# if content: | |
# text += content | |
# logger.debug(f"api: {content=}") | |
# yield text | |
logger.debug(f"{response}") | |
return response.choices[0].delta.content | |
except Exception as exc: | |
logger.error(exc) | |
response = f"{exc=}" | |
def predict_api(message): | |
logger.debug(f"{message=}") | |
text = generate(message) | |
logger.debug(f"text::{text=}") | |
return f"json: {text=}" | |
js = """function () { | |
gradioURL = window.location.href | |
if (!gradioURL.endsWith('?__theme=dark')) { | |
window.location.replace(gradioURL + '?__theme=dark'); | |
} | |
}""" | |
css = """ | |
footer { | |
visibility: hidden; | |
} | |
full-height { | |
height: 100%; | |
} | |
""" | |
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo: | |
gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"]) | |
with gr.Accordion("For Chat/Translation API", open=False, visible=False): | |
input_text = gr.Text() | |
api_btn = gr.Button("Go", variant="primary") | |
out_text = gr.Text() | |
api_btn.click( | |
predict_api, | |
input_text, | |
out_text, | |
api_name="api", | |
) | |
if __name__ == "__main__": | |
demo.queue().launch(debug=True, share=True) | |