--- license: apache-2.0 base_model: togethercomputer/RedPajama-INCITE-Base-3B-v1 datasets: - johnrobinsn/alpaca-cleaned tags: - lora - alpaca - peft - redpajama --- # RedPajama-3B-instruct-lora This is an instruction fine-tuned model of https://huggingface.co/togethercomputer/RedPajama-INCITE-Base-3B-v1, using `int8` mixed training. ## Training dataset Cleaned version of alpaca from https://huggingface.co/datasets/johnrobinsn/alpaca-cleaned. ## How to use ```Python from huggingface_hub import model_info, hf_hub_download from peft import LoraConfig, get_peft_model, set_peft_model_state_dict, TaskType from textwrap import dedent from transformers import AutoModelForCausalLM, AutoTokenizer import torch model_id = "pcuenq/RedPajama-3B-instruct-lora" # Load base model info = model_info(model_id) base_model = info.cardData["base_model"] model = AutoModelForCausalLM.from_pretrained( base_model, load_in_8bit=True, device_map="auto", ) tokenizer = AutoTokenizer.from_pretrained(model_id) # Prepare for LoRA lora_config = LoraConfig( r=8, lora_alpha=16, target_modules=["query_key_value"], lora_dropout=0.05, bias="none", task_type=TaskType.CAUSAL_LM ) model = get_peft_model(model, lora_config) # Download and apply LoRA weights lora_filename = hf_hub_download(repo_id=model_id, filename="lora.bin") lora_dict = torch.load(lora_filename) set_peft_model_state_dict(model, lora_dict) # Run inference def generate_prompt(instruction, inputs=None): if inputs is not None: return dedent( f"""\ Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Input: {inputs} ### Response: """ ) else: return dedent( f"""\ Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response: """ ) prompt = generate_prompt("Has humankind ever set foot on the Moon?") inputs = tokenizer(prompt, return_tensors='pt').to(model.device) input_length = inputs.input_ids.shape[1] outputs = model.generate( **inputs, max_new_tokens=50, do_sample=True, temperature=1.0, top_p=0.7, top_k=50, return_dict_in_generate=True ) tokens = outputs.sequences[0, input_length:] # Strip from first eos_pos = (tokens == tokenizer.eos_token_id).nonzero() if eos_pos.numel() > 0: tokens = tokens[:eos_pos[0].item()] output_str = tokenizer.decode(tokens) print(output_str) ```