Spaces:

Ibrahimarain
/

llama-2-13B

Runtime error

App Files Files Community

llama-2-13B / app.py

Ibrahimarain

updated path

da69772 9 months ago

raw

history blame contribute delete

5.48 kB

	import llama_cpp
	import llama_cpp.llama_tokenizer
	from llama_cpp import Llama

	import gradio as gr
	from loguru import logger
	import psutil
	from ctransformers import AutoModelForCausalLM,AutoTokenizer


	prompt_template = """[INST] <<SYS>>
	You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign.
	<</SYS>>

	{question} [/INST]
	"""

	model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf"
	model_loc = "TheBloke/Llama-2-13B-chat-GGUF"

	llama = AutoModelForCausalLM.from_pretrained(
	model_loc,
	model_type="llama",
	context_length=4096,
	max_new_tokens=2048,
	hf=True
	# threads=cpu_count,
	)


	# llama = llama_cpp.Llama.from_pretrained(
	# #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
	# #filename="*q8_0.gguf",
	# mode_path=model_loc,
	# model_type="llama",
	# context_length=4096,
	# max_new_tokens=2048,
	# filename="llama-2-13b-chat.Q5_K_M.gguf",
	# tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
	# verbose=False
	# )

	# llama = Llama(
	# model_path=model_loc,
	# max_tokens=4096,
	# n_ctx=4096,
	# verbose=False,
	# )

	_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
	stop_string = [elm.split(":")[0] + ":" for elm in _][-2]

	logger.debug(f"{stop_string=}")

	_ = psutil.cpu_count(logical=False) - 1
	cpu_count: int = int(_) if _ else 1
	logger.debug(f"{cpu_count=}")



	model = "gpt-3.5-turbo"

	def predict(message, history):
	messages = []
	prompt = prompt_template.format(question=message)

	for user_message, assistant_message in history:
	messages.append({"role": "system", "content": prompt},)
	messages.append({"role": "user", "content": user_message})
	messages.append({"role": "assistant", "content": assistant_message})

	messages.append({"role": "user", "content": message})

	response = llama.create_chat_completion_openai_v1(
	model=model,
	messages=messages,
	response_format={
	"type": "json_object",
	"schema": {
	"type": "object",
	"properties": {"title": {"type": "string"},
	#"description": {"type": "string"},
	"sample_pitch": {"type": "string"},
	"amount": {"type": "string"},
	"location": {"type": "string"}},
	"required": ["title","sample_pitch","amount","location"], #description
	},
	},
	temperature=0.7,
	stream=True
	)

	text = ""
	for chunk in response:
	content = chunk.choices[0].delta.content
	if content:
	text += content
	yield text


	def generate(message):

	try:
	messages = []
	prompt = prompt_template.format(question=message)

	#for user_message, assistant_message in history:
	messages.append({"role": "system", "content": prompt},)
	#messages.append({"role": "user", "content": user_message})
	#messages.append({"role": "assistant", "content": assistant_message})

	messages.append({"role": "user", "content": message})

	response = llama.create_chat_completion_openai_v1(
	model=model,
	messages=messages,
	response_format={
	"type": "json_object",
	"schema": {
	"type": "object",
	"properties": {"title": {"type": "string"},
	#"description": {"type": "string"},
	"sample_pitch": {"type": "string"},
	"amount": {"type": "string"},
	"location": {"type": "string"}},
	"required": ["title","sample_pitch","amount","location"], #description
	},
	},
	temperature=0.7,
	stream=False)

	# text = ""
	# for chunk in response:
	# content = chunk.choices[0].delta.content
	# if content:
	# text += content
	# logger.debug(f"api: {content=}")

	# yield text

	logger.debug(f"{response}")

	return response.choices[0].delta.content


	except Exception as exc:
	logger.error(exc)
	response = f"{exc=}"


	def predict_api(message):
	logger.debug(f"{message=}")
	text = generate(message)
	logger.debug(f"text::{text=}")

	return f"json: {text=}"



	js = """function () {
	gradioURL = window.location.href
	if (!gradioURL.endsWith('?__theme=dark')) {
	window.location.replace(gradioURL + '?__theme=dark');
	}
	}"""

	css = """
	footer {
	visibility: hidden;
	}
	full-height {
	height: 100%;
	}
	"""

	with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo:
	gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"])

	with gr.Accordion("For Chat/Translation API", open=False, visible=False):
	input_text = gr.Text()
	api_btn = gr.Button("Go", variant="primary")
	out_text = gr.Text()

	api_btn.click(
	predict_api,
	input_text,
	out_text,
	api_name="api",
	)


	if __name__ == "__main__":
	demo.queue().launch(debug=True, share=True)