bstraehle commited on
Commit
0db656a
·
verified ·
1 Parent(s): 5eedbaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -34
app.py CHANGED
@@ -29,23 +29,23 @@ def process(action, base_model_name, ft_model_name, dataset_name, system_prompt,
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
31
 
32
- dataset = load_dataset(dataset_name)
33
 
34
- print("### Dataset")
35
- print(dataset)
36
- print("### Example")
37
- print(dataset["train"][:1])
38
- print("###")
39
 
40
  # Load model
41
 
42
- model, tokenizer = load_model(base_model_name)
43
 
44
- print("### Model")
45
- print(model)
46
- print("### Tokenizer")
47
- print(tokenizer)
48
- print("###")
49
 
50
  # Pre-process dataset
51
 
@@ -53,26 +53,26 @@ def fine_tune_model(base_model_name, dataset_name):
53
  model_inputs = tokenizer(examples["sql_prompt"], text_target=examples["sql"], max_length=512, padding="max_length", truncation=True)
54
  return model_inputs
55
 
56
- dataset = dataset.map(preprocess, batched=True)
57
 
58
- print("### Pre-processed dataset")
59
- print(dataset)
60
- print("### Example")
61
- print(dataset["train"][:1])
62
- print("###")
63
 
64
  # Split dataset into training and validation sets
65
 
66
- #train_dataset = dataset["train"]
67
- #test_dataset = dataset["test"]
68
- train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
69
- test_dataset = dataset["test"].shuffle(seed=42).select(range(100))
70
 
71
- print("### Training dataset")
72
- print(train_dataset)
73
- print("### Validation dataset")
74
- print(test_dataset)
75
- print("###")
76
 
77
  # Configure training arguments
78
 
@@ -102,13 +102,13 @@ def fine_tune_model(base_model_name, dataset_name):
102
 
103
  # Create trainer
104
 
105
- trainer = Seq2SeqTrainer(
106
- model=model,
107
- args=training_args,
108
- train_dataset=train_dataset,
109
- eval_dataset=test_dataset,
110
- #compute_metrics=lambda pred: {"accuracy": torch.sum(pred.label_ids == pred.predictions.argmax(-1))},
111
- )
112
 
113
  # Train model
114
 
 
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
31
 
32
+ #dataset = load_dataset(dataset_name)
33
 
34
+ #print("### Dataset")
35
+ #print(dataset)
36
+ #print("### Example")
37
+ #print(dataset["train"][:1])
38
+ #print("###")
39
 
40
  # Load model
41
 
42
+ #model, tokenizer = load_model(base_model_name)
43
 
44
+ #print("### Model")
45
+ #print(model)
46
+ #print("### Tokenizer")
47
+ #print(tokenizer)
48
+ #print("###")
49
 
50
  # Pre-process dataset
51
 
 
53
  model_inputs = tokenizer(examples["sql_prompt"], text_target=examples["sql"], max_length=512, padding="max_length", truncation=True)
54
  return model_inputs
55
 
56
+ #dataset = dataset.map(preprocess, batched=True)
57
 
58
+ #print("### Pre-processed dataset")
59
+ #print(dataset)
60
+ #print("### Example")
61
+ #print(dataset["train"][:1])
62
+ #print("###")
63
 
64
  # Split dataset into training and validation sets
65
 
66
+ ##train_dataset = dataset["train"]
67
+ ##test_dataset = dataset["test"]
68
+ #train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
69
+ #test_dataset = dataset["test"].shuffle(seed=42).select(range(100))
70
 
71
+ #print("### Training dataset")
72
+ #print(train_dataset)
73
+ #print("### Validation dataset")
74
+ #print(test_dataset)
75
+ #print("###")
76
 
77
  # Configure training arguments
78
 
 
102
 
103
  # Create trainer
104
 
105
+ #trainer = Seq2SeqTrainer(
106
+ # model=model,
107
+ # args=training_args,
108
+ # train_dataset=train_dataset,
109
+ # eval_dataset=test_dataset,
110
+ # #compute_metrics=lambda pred: {"accuracy": torch.sum(pred.label_ids == pred.predictions.argmax(-1))},
111
+ #)
112
 
113
  # Train model
114