llama / app.py
Dorn4449's picture
Create app.py
a30b42a verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
# Define datasets and their IDs
datasets_info = {
"SQuAD": "squad",
"SQuAD 2.0": "squad_v2",
"Natural Questions": "nq",
"TriviaQA": "triviaqa",
"QuAC": "quac",
"FAQ Dataset": "faq",
"BoolQ": "boolq",
"Open Book QA": "obqa"
}
# Load model and tokenizer directly
tokenizer = AutoTokenizer.from_pretrained("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF")
model = AutoModelForCausalLM.from_pretrained("nvidia/Llama-3.1-Nemotron-70B-Instruct-HF")
def train_model(dataset_name):
# Load the dataset
dataset = load_dataset(datasets_info[dataset_name])
# Tokenization
def preprocess_function(examples):
return tokenizer(examples['question'], examples['context'], truncation=True)
tokenized_dataset = dataset.map(preprocess_function, batched=True)
# Fine-tune the model
training_args = TrainingArguments(
output_dir=f"./{dataset_name}_model",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
weight_decay=0.01,
logging_dir='./logs',
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'],
eval_dataset=tokenized_dataset['validation']
)
trainer.train()
# Save the model weights
model.save_pretrained(f"./{dataset_name}_model")
tokenizer.save_pretrained(f"./{dataset_name}_model")
return f"Model trained and saved for {dataset_name}!"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## Train QA Model on Multiple Datasets")
dataset_name = gr.Dropdown(choices=list(datasets_info.keys()), label="Select Dataset")
train_button = gr.Button("Train Model")
output = gr.Textbox(label="Output")
def train_and_display(dataset_name):
return train_model(dataset_name)
train_button.click(train_and_display, inputs=dataset_name, outputs=output)
demo.launch()