|
import json |
|
import os.path as osp |
|
import random |
|
from typing import Union |
|
import os |
|
import sys |
|
from typing import List |
|
import torch |
|
import transformers |
|
from datasets import load_dataset |
|
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer |
|
import gradio as gr |
|
import torch.nn as nn |
|
|
|
|
|
from peft import ( |
|
LoraConfig, |
|
get_peft_model, |
|
get_peft_model_state_dict, |
|
prepare_model_for_int8_training, |
|
set_peft_model_state_dict, |
|
PeftModel |
|
) |
|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
|
|
|
|
base_model='nickypro/tinyllama-15M' |
|
|
|
|
|
class Prompter(object): |
|
|
|
def generate_prompt( |
|
self, |
|
instruction: str, |
|
label: Union[None, str] = None, |
|
) -> str: |
|
|
|
res = f"{instruction}\nAnswer: " |
|
|
|
if label: |
|
res = f"{res}{label}" |
|
|
|
return res |
|
|
|
def get_response(self, output: str) -> str: |
|
return output.split("Answer:")[1].strip().replace("/", "\u00F7").replace("*", "\u00D7") |
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
base_model, |
|
torch_dtype=torch.float32, |
|
device_map="auto", |
|
) |
|
model = PeftModel.from_pretrained( |
|
model, |
|
f'checkpoint-16000', |
|
torch_dtype=torch.float32, |
|
) |
|
|
|
model.eval() |
|
if torch.__version__ >= "2" and sys.platform != "win32": |
|
model = torch.compile(model) |
|
|
|
tokenizer = LlamaTokenizer.from_pretrained('hf-internal-testing/llama-tokenizer') |
|
tokenizer.pad_token_id = 0 |
|
tokenizer.padding_side = "left" |
|
|
|
|
|
def generate_answers(instructions, model, tokenizer): |
|
prompter = Prompter() |
|
raw_answers = [] |
|
|
|
for instruction in instructions: |
|
prompt = prompter.generate_prompt(instruction) |
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
|
input_ids = inputs["input_ids"] |
|
|
|
generation_output = model.generate( |
|
input_ids=input_ids, |
|
return_dict_in_generate=True, |
|
output_scores=True, |
|
pad_token_id=0, |
|
eos_token_id=tokenizer.eos_token_id, |
|
max_new_tokens=16 |
|
) |
|
s = generation_output.sequences[0] |
|
raw_answers.append(tokenizer.decode(s, skip_special_tokens=True).strip()) |
|
|
|
return raw_answers |
|
|
|
|
|
def evaluate(instruction): |
|
return generate_answers([instruction], model, tokenizer)[0] |
|
|
|
|
|
if __name__ == "__main__": |
|
gr.Interface( |
|
fn=evaluate, |
|
inputs=[ |
|
gr.components.Textbox( |
|
lines=1, |
|
label="Arithmetic", |
|
placeholder="63303235 + 20239503", |
|
) |
|
], |
|
outputs=[ |
|
gr.Textbox( |
|
lines=5, |
|
label="Output", |
|
) |
|
], |
|
title="Arithmetic LLaMA", |
|
description="This model is 15M llama model, finetuned on a+b tasks", |
|
).queue().launch(server_name="0.0.0.0", share=True) |
|
|