File size: 2,904 Bytes
cac8fe7 ab12a97 cac8fe7 ab12a97 cac8fe7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import random
from os import path
from argparse import ArgumentParser
import torch
from torch.cuda import is_available as cuda_is_available
from model import GPT, GPTWithLoRA
from data import Alpaca
import tiktoken
def main():
parser = ArgumentParser(
description="Generate text from the model given a prompt.",
)
parser.add_argument("--checkpoint_path", default="./out/checkpoint.pt", type=str)
parser.add_argument("--lora_path", default=None, type=str)
parser.add_argument("--max_tokens", default=1000, type=int)
parser.add_argument("--temperature", default=1.0, type=float)
parser.add_argument("--top_k", default=500, type=int)
parser.add_argument("--top_p", default=0.9, type=float)
parser.add_argument("--device", default="cuda", type=str)
parser.add_argument("--seed", default=None, type=int)
args = parser.parse_args()
if "cuda" in args.device and not cuda_is_available():
raise RuntimeError("Cuda is not available.")
torch.set_float32_matmul_precision("high")
if args.seed:
torch.manual_seed(args.seed)
random.seed(args.seed)
checkpoint = torch.load(
args.checkpoint_path, map_location=args.device, weights_only=True
)
tokenizer = tiktoken.get_encoding(checkpoint["token_encoding"])
model = GPT(**checkpoint["model_args"])
model = torch.compile(model)
model.load_state_dict(checkpoint["model"])
print("Model checkpoint loaded")
if args.lora_path:
checkpoint = torch.load(
args.lora_path, map_location=args.device, weights_only=True
)
model = GPTWithLoRA(model, **checkpoint["lora_args"])
model = torch.compile(model)
model.load_state_dict(checkpoint["lora"], strict=False)
model.merge_lora_parameters()
print("LoRA checkpoint loaded")
model.to(args.device)
model.eval()
while True:
prompt = input("Enter a prompt: ")
if args.lora_path:
context = input("Additional context (leave blank for none): ")
if len(context) > 0:
prompt = Alpaca.PROMPT_TEMPLATE_WITH_INPUT.format(
input=context, instruction=prompt
)
else:
prompt = Alpaca.PROMPT_TEMPLATE.format(instruction=prompt)
prompt = tokenizer.encode_ordinary(prompt)
prompt = torch.tensor(prompt, dtype=torch.int64, device=args.device)
for token in model.generate(
prompt, args.max_tokens, args.temperature, args.top_k, args.top_p
):
out = tokenizer.decode_single_token_bytes(token).decode(
"utf-8", errors="replace"
)
print(out, end="", flush=True)
print("\n")
if "y" not in input("Go again? (yes|no): ").lower():
break
if __name__ == "__main__":
main()
|