Elster

Runtime error

App Files Files Community

Elster / app.py

davanstrien HF staff

use Markdown for generated model response

0d2a813 7 months ago

raw

history blame

2.84 kB

	import gradio as gr
	import transformers
	import torch
	import json
	from transformers import AutoTokenizer
	import os
	from huggingface_hub import login
	import spaces

	HF_TOKEN = os.getenv("HF_TOKEN")
	login(HF_TOKEN)
	# Load the model
	model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id, add_special_tokens=True)

	pipeline = transformers.pipeline(
	"text-generation",
	model=model_id,
	model_kwargs={"torch_dtype": torch.bfloat16},
	device="cuda",
	)

	# Load the model configuration
	with open("model_configs.json", "r") as f:
	model_configs = json.load(f)
	model_config = model_configs[model_id]

	# Extract instruction
	extract_input = model_config["extract_input"]
	terminators = [
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>"),
	]


	@spaces.GPU
	def generate_instruction_response():
	instruction = pipeline(
	extract_input,
	max_new_tokens=2048,
	eos_token_id=terminators,
	do_sample=True,
	temperature=1,
	top_p=1,
	)

	sanitized_instruction = instruction[0]["generated_text"][
	len(extract_input) :
	].split("\n")[0]

	response_template = f"""<\|begin_of_text\|><\|start_header_id\|>user<\|end_header_id\|>\n\n{sanitized_instruction}<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>\n\n"""

	response = pipeline(
	response_template,
	max_new_tokens=2048,
	eos_token_id=terminators,
	do_sample=True,
	temperature=1,
	top_p=1,
	)

	user_message = sanitized_instruction
	assistant_response = response[0]["generated_text"][len(response_template) :]

	return user_message, assistant_response


	title = "Magpie demo"
	description = """
	This Gradio demo showcases the approach described in the Magpie paper. Magpie is a data synthesis pipeline that creates high-quality alignment data without relying on prompt engineering or seed questions. Instead, it generates instruction data by prompting aligned LLMs with a pre-query template. This method does not prompt the model with a question or starting query. Instead, it uses the model's pre-query template to generate instructions. Essentially, the model is given only the template until a user instruction starts, and then it generates the instruction and the response.

	In this demo, you can see how the model generates a user instruction and a model response.

	You can learn more about the approach [in the paper](https://huggingface.co/papers/2406.08464).
	"""
	# Create the Gradio interface
	iface = gr.Interface(
	fn=generate_instruction_response,
	inputs=[],
	outputs=[
	gr.Text(label="Generated User Instruction"),
	gr.Markdown(label="Generated Model Response"),
	],
	title=title,
	description=description,
	)

	# Launch the app
	iface.launch(debug=True)