llama-2-13B / app.py
Ibrahimarain's picture
updated path
da69772
import llama_cpp
import llama_cpp.llama_tokenizer
from llama_cpp import Llama
import gradio as gr
from loguru import logger
import psutil
from ctransformers import AutoModelForCausalLM,AutoTokenizer
prompt_template = """[INST] <<SYS>>
You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign.
<</SYS>>
{question} [/INST]
"""
model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf"
model_loc = "TheBloke/Llama-2-13B-chat-GGUF"
llama = AutoModelForCausalLM.from_pretrained(
model_loc,
model_type="llama",
context_length=4096,
max_new_tokens=2048,
hf=True
# threads=cpu_count,
)
# llama = llama_cpp.Llama.from_pretrained(
# #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
# #filename="*q8_0.gguf",
# mode_path=model_loc,
# model_type="llama",
# context_length=4096,
# max_new_tokens=2048,
# filename="llama-2-13b-chat.Q5_K_M.gguf",
# tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
# verbose=False
# )
# llama = Llama(
# model_path=model_loc,
# max_tokens=4096,
# n_ctx=4096,
# verbose=False,
# )
_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
logger.debug(f"{stop_string=}")
_ = psutil.cpu_count(logical=False) - 1
cpu_count: int = int(_) if _ else 1
logger.debug(f"{cpu_count=}")
model = "gpt-3.5-turbo"
def predict(message, history):
messages = []
prompt = prompt_template.format(question=message)
for user_message, assistant_message in history:
messages.append({"role": "system", "content": prompt},)
messages.append({"role": "user", "content": user_message})
messages.append({"role": "assistant", "content": assistant_message})
messages.append({"role": "user", "content": message})
response = llama.create_chat_completion_openai_v1(
model=model,
messages=messages,
response_format={
"type": "json_object",
"schema": {
"type": "object",
"properties": {"title": {"type": "string"},
#"description": {"type": "string"},
"sample_pitch": {"type": "string"},
"amount": {"type": "string"},
"location": {"type": "string"}},
"required": ["title","sample_pitch","amount","location"], #description
},
},
temperature=0.7,
stream=True
)
text = ""
for chunk in response:
content = chunk.choices[0].delta.content
if content:
text += content
yield text
def generate(message):
try:
messages = []
prompt = prompt_template.format(question=message)
#for user_message, assistant_message in history:
messages.append({"role": "system", "content": prompt},)
#messages.append({"role": "user", "content": user_message})
#messages.append({"role": "assistant", "content": assistant_message})
messages.append({"role": "user", "content": message})
response = llama.create_chat_completion_openai_v1(
model=model,
messages=messages,
response_format={
"type": "json_object",
"schema": {
"type": "object",
"properties": {"title": {"type": "string"},
#"description": {"type": "string"},
"sample_pitch": {"type": "string"},
"amount": {"type": "string"},
"location": {"type": "string"}},
"required": ["title","sample_pitch","amount","location"], #description
},
},
temperature=0.7,
stream=False)
# text = ""
# for chunk in response:
# content = chunk.choices[0].delta.content
# if content:
# text += content
# logger.debug(f"api: {content=}")
# yield text
logger.debug(f"{response}")
return response.choices[0].delta.content
except Exception as exc:
logger.error(exc)
response = f"{exc=}"
def predict_api(message):
logger.debug(f"{message=}")
text = generate(message)
logger.debug(f"text::{text=}")
return f"json: {text=}"
js = """function () {
gradioURL = window.location.href
if (!gradioURL.endsWith('?__theme=dark')) {
window.location.replace(gradioURL + '?__theme=dark');
}
}"""
css = """
footer {
visibility: hidden;
}
full-height {
height: 100%;
}
"""
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo:
gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"])
with gr.Accordion("For Chat/Translation API", open=False, visible=False):
input_text = gr.Text()
api_btn = gr.Button("Go", variant="primary")
out_text = gr.Text()
api_btn.click(
predict_api,
input_text,
out_text,
api_name="api",
)
if __name__ == "__main__":
demo.queue().launch(debug=True, share=True)