Salvatore commited on
Commit
14a0015
·
verified ·
1 Parent(s): df72630

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. test_results.json +6 -7
  3. trainer_state.json +101 -142
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.22972972972973,
3
- "eval_accuracy": 0.9161290322580645,
4
- "eval_loss": 0.37556546926498413,
5
  "eval_model_preparation_time": 0.0017,
6
- "eval_runtime": 9.9597,
7
- "eval_samples_per_second": 15.563,
8
- "eval_steps_per_second": 2.008
9
  }
 
1
  {
2
+ "epoch": 3.84,
3
+ "eval_accuracy": 0.23684210526315788,
4
+ "eval_loss": 1.4163174629211426,
5
  "eval_model_preparation_time": 0.0017,
6
+ "eval_runtime": 4.1873,
7
+ "eval_samples_per_second": 18.15,
8
+ "eval_steps_per_second": 2.388
9
  }
test_results.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
- "epoch": 3.22972972972973,
3
- "eval_accuracy": 0.9161290322580645,
4
- "eval_loss": 0.37556546926498413,
5
- "eval_model_preparation_time": 0.0017,
6
- "eval_runtime": 9.9597,
7
- "eval_samples_per_second": 15.563,
8
- "eval_steps_per_second": 2.008
9
  }
 
1
  {
2
+ "epoch": 3.84,
3
+ "eval_accuracy": 0.23684210526315788,
4
+ "eval_loss": 1.4163174629211426,
5
+ "eval_runtime": 4.1873,
6
+ "eval_samples_per_second": 18.15,
7
+ "eval_steps_per_second": 2.388
 
8
  }
trainer_state.json CHANGED
@@ -1,185 +1,144 @@
1
  {
2
- "best_metric": 0.7714285714285715,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-114",
4
- "epoch": 3.22972972972973,
5
  "eval_steps": 500,
6
- "global_step": 148,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06756756756756757,
13
- "grad_norm": 8.911906242370605,
14
- "learning_rate": 3.3333333333333335e-05,
15
- "loss": 2.2939,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.13513513513513514,
20
- "grad_norm": 7.693809509277344,
21
- "learning_rate": 4.81203007518797e-05,
22
- "loss": 2.1937,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.20270270270270271,
27
- "grad_norm": 7.856507778167725,
28
- "learning_rate": 4.43609022556391e-05,
29
- "loss": 2.1284,
30
- "step": 30
 
 
31
  },
32
  {
33
- "epoch": 0.25675675675675674,
34
- "eval_accuracy": 0.42857142857142855,
35
- "eval_loss": 1.8425960540771484,
36
- "eval_model_preparation_time": 0.0017,
37
- "eval_runtime": 4.7396,
38
- "eval_samples_per_second": 14.769,
39
- "eval_steps_per_second": 1.899,
40
- "step": 38
41
  },
42
  {
43
- "epoch": 1.0135135135135136,
44
- "grad_norm": 8.737997055053711,
45
- "learning_rate": 4.0601503759398494e-05,
46
- "loss": 1.9138,
47
  "step": 40
48
  },
49
  {
50
- "epoch": 1.0810810810810811,
51
- "grad_norm": 9.056952476501465,
52
- "learning_rate": 3.6842105263157895e-05,
53
- "loss": 1.3965,
 
 
 
 
 
 
 
 
 
54
  "step": 50
55
  },
56
  {
57
- "epoch": 1.1486486486486487,
58
- "grad_norm": 9.419984817504883,
59
- "learning_rate": 3.3082706766917295e-05,
60
- "loss": 1.0866,
61
  "step": 60
62
  },
63
  {
64
- "epoch": 1.2162162162162162,
65
- "grad_norm": 5.51899528503418,
66
- "learning_rate": 2.9323308270676693e-05,
67
- "loss": 0.8904,
68
  "step": 70
69
  },
70
  {
71
- "epoch": 1.2567567567567568,
72
- "eval_accuracy": 0.7428571428571429,
73
- "eval_loss": 0.7788430452346802,
74
- "eval_model_preparation_time": 0.0017,
75
- "eval_runtime": 4.6999,
76
- "eval_samples_per_second": 14.894,
77
- "eval_steps_per_second": 1.915,
78
- "step": 76
79
  },
80
  {
81
- "epoch": 2.027027027027027,
82
- "grad_norm": 6.201491832733154,
83
- "learning_rate": 2.556390977443609e-05,
84
- "loss": 0.6935,
85
  "step": 80
86
  },
87
  {
88
- "epoch": 2.0945945945945947,
89
- "grad_norm": 12.898451805114746,
90
- "learning_rate": 2.1804511278195487e-05,
91
- "loss": 0.5538,
92
  "step": 90
93
  },
94
  {
95
- "epoch": 2.1621621621621623,
96
- "grad_norm": 9.50394058227539,
97
- "learning_rate": 1.8045112781954888e-05,
98
- "loss": 0.5005,
99
- "step": 100
100
- },
101
- {
102
- "epoch": 2.22972972972973,
103
- "grad_norm": 10.287944793701172,
104
- "learning_rate": 1.4285714285714285e-05,
105
- "loss": 0.3888,
106
- "step": 110
107
- },
108
- {
109
- "epoch": 2.2567567567567566,
110
- "eval_accuracy": 0.7714285714285715,
111
- "eval_loss": 0.4577937126159668,
112
- "eval_model_preparation_time": 0.0017,
113
- "eval_runtime": 4.8277,
114
- "eval_samples_per_second": 14.5,
115
- "eval_steps_per_second": 1.864,
116
- "step": 114
117
- },
118
- {
119
- "epoch": 3.0405405405405403,
120
- "grad_norm": 5.723540306091309,
121
- "learning_rate": 1.0526315789473684e-05,
122
- "loss": 0.3792,
123
- "step": 120
124
- },
125
- {
126
- "epoch": 3.108108108108108,
127
- "grad_norm": 3.536975383758545,
128
- "learning_rate": 6.766917293233083e-06,
129
- "loss": 0.2719,
130
- "step": 130
131
- },
132
- {
133
- "epoch": 3.175675675675676,
134
- "grad_norm": 6.4476141929626465,
135
- "learning_rate": 3.007518796992481e-06,
136
- "loss": 0.2734,
137
- "step": 140
138
- },
139
- {
140
- "epoch": 3.22972972972973,
141
- "eval_accuracy": 0.7571428571428571,
142
- "eval_loss": 0.39962273836135864,
143
- "eval_model_preparation_time": 0.0017,
144
- "eval_runtime": 4.7855,
145
- "eval_samples_per_second": 14.627,
146
- "eval_steps_per_second": 1.881,
147
- "step": 148
148
- },
149
- {
150
- "epoch": 3.22972972972973,
151
- "step": 148,
152
- "total_flos": 1.460491890402263e+18,
153
- "train_loss": 1.0221337974071503,
154
- "train_runtime": 165.2475,
155
- "train_samples_per_second": 7.165,
156
- "train_steps_per_second": 0.896
157
- },
158
- {
159
- "epoch": 3.22972972972973,
160
- "eval_accuracy": 0.9161290322580645,
161
- "eval_loss": 0.3755654990673065,
162
- "eval_model_preparation_time": 0.0017,
163
- "eval_runtime": 9.8886,
164
- "eval_samples_per_second": 15.675,
165
- "eval_steps_per_second": 2.023,
166
- "step": 148
167
- },
168
- {
169
- "epoch": 3.22972972972973,
170
- "eval_accuracy": 0.9161290322580645,
171
- "eval_loss": 0.37556546926498413,
172
- "eval_model_preparation_time": 0.0017,
173
- "eval_runtime": 9.9597,
174
- "eval_samples_per_second": 15.563,
175
- "eval_steps_per_second": 2.008,
176
- "step": 148
177
  }
178
  ],
179
  "logging_steps": 10,
180
- "max_steps": 148,
181
  "num_input_tokens_seen": 0,
182
- "num_train_epochs": 9223372036854775807,
183
  "save_steps": 500,
184
  "stateful_callbacks": {
185
  "TrainerControl": {
@@ -193,7 +152,7 @@
193
  "attributes": {}
194
  }
195
  },
196
- "total_flos": 1.460491890402263e+18,
197
  "train_batch_size": 8,
198
  "trial_name": null,
199
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2625,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-25",
4
+ "epoch": 3.84,
5
  "eval_steps": 500,
6
+ "global_step": 96,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.4,
13
+ "grad_norm": 9.724103927612305,
14
+ "learning_rate": 5e-05,
15
+ "loss": 1.4798,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.8,
20
+ "grad_norm": 8.079227447509766,
21
+ "learning_rate": 4.418604651162791e-05,
22
+ "loss": 1.4528,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 1.0,
27
+ "eval_accuracy": 0.2625,
28
+ "eval_loss": 1.4395983219146729,
29
+ "eval_runtime": 5.2554,
30
+ "eval_samples_per_second": 15.222,
31
+ "eval_steps_per_second": 1.903,
32
+ "step": 25
33
  },
34
  {
35
+ "epoch": 1.2,
36
+ "grad_norm": 3.919217824935913,
37
+ "learning_rate": 3.837209302325582e-05,
38
+ "loss": 1.4901,
39
+ "step": 30
 
 
 
40
  },
41
  {
42
+ "epoch": 1.6,
43
+ "grad_norm": 1.8412970304489136,
44
+ "learning_rate": 3.2558139534883724e-05,
45
+ "loss": 1.4273,
46
  "step": 40
47
  },
48
  {
49
+ "epoch": 2.0,
50
+ "grad_norm": 7.860177993774414,
51
+ "learning_rate": 2.674418604651163e-05,
52
+ "loss": 1.4244,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "eval_accuracy": 0.25,
58
+ "eval_loss": 1.4033204317092896,
59
+ "eval_runtime": 4.221,
60
+ "eval_samples_per_second": 18.953,
61
+ "eval_steps_per_second": 2.369,
62
  "step": 50
63
  },
64
  {
65
+ "epoch": 2.4,
66
+ "grad_norm": 5.914783000946045,
67
+ "learning_rate": 2.0930232558139536e-05,
68
+ "loss": 1.4058,
69
  "step": 60
70
  },
71
  {
72
+ "epoch": 2.8,
73
+ "grad_norm": 3.5916202068328857,
74
+ "learning_rate": 1.5116279069767441e-05,
75
+ "loss": 1.4495,
76
  "step": 70
77
  },
78
  {
79
+ "epoch": 3.0,
80
+ "eval_accuracy": 0.25,
81
+ "eval_loss": 1.3919236660003662,
82
+ "eval_runtime": 4.5151,
83
+ "eval_samples_per_second": 17.718,
84
+ "eval_steps_per_second": 2.215,
85
+ "step": 75
 
86
  },
87
  {
88
+ "epoch": 3.2,
89
+ "grad_norm": 5.373912811279297,
90
+ "learning_rate": 9.302325581395349e-06,
91
+ "loss": 1.4098,
92
  "step": 80
93
  },
94
  {
95
+ "epoch": 3.6,
96
+ "grad_norm": 5.40514612197876,
97
+ "learning_rate": 3.488372093023256e-06,
98
+ "loss": 1.3912,
99
  "step": 90
100
  },
101
  {
102
+ "epoch": 3.84,
103
+ "eval_accuracy": 0.25,
104
+ "eval_loss": 1.3884985446929932,
105
+ "eval_runtime": 4.4577,
106
+ "eval_samples_per_second": 17.947,
107
+ "eval_steps_per_second": 2.243,
108
+ "step": 96
109
+ },
110
+ {
111
+ "epoch": 3.84,
112
+ "step": 96,
113
+ "total_flos": 9.420416318601953e+17,
114
+ "train_loss": 1.4338702062765758,
115
+ "train_runtime": 108.4417,
116
+ "train_samples_per_second": 7.082,
117
+ "train_steps_per_second": 0.885
118
+ },
119
+ {
120
+ "epoch": 3.84,
121
+ "eval_accuracy": 0.23684210526315788,
122
+ "eval_loss": 1.4163174629211426,
123
+ "eval_runtime": 4.7313,
124
+ "eval_samples_per_second": 16.063,
125
+ "eval_steps_per_second": 2.114,
126
+ "step": 96
127
+ },
128
+ {
129
+ "epoch": 3.84,
130
+ "eval_accuracy": 0.23684210526315788,
131
+ "eval_loss": 1.4163174629211426,
132
+ "eval_runtime": 4.1873,
133
+ "eval_samples_per_second": 18.15,
134
+ "eval_steps_per_second": 2.388,
135
+ "step": 96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  }
137
  ],
138
  "logging_steps": 10,
139
+ "max_steps": 96,
140
  "num_input_tokens_seen": 0,
141
+ "num_train_epochs": 4,
142
  "save_steps": 500,
143
  "stateful_callbacks": {
144
  "TrainerControl": {
 
152
  "attributes": {}
153
  }
154
  },
155
+ "total_flos": 9.420416318601953e+17,
156
  "train_batch_size": 8,
157
  "trial_name": null,
158
  "trial_params": null