|
{ |
|
"best_metric": 0.5666666666666667, |
|
"best_model_checkpoint": "MCG-NJU/videomae-large/checkpoint-140", |
|
"epoch": 31.013636363636362, |
|
"eval_steps": 500, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 2.6619, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 2.7016749382019043, |
|
"eval_runtime": 5.569, |
|
"eval_samples_per_second": 5.387, |
|
"eval_steps_per_second": 0.359, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 2.6232, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.0, |
|
"eval_loss": 2.6628258228302, |
|
"eval_runtime": 5.363, |
|
"eval_samples_per_second": 5.594, |
|
"eval_steps_per_second": 0.373, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 2.5419, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 2.381, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.16666666666666666, |
|
"eval_loss": 2.5797576904296875, |
|
"eval_runtime": 5.6099, |
|
"eval_samples_per_second": 5.348, |
|
"eval_steps_per_second": 0.357, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.9242424242424245e-05, |
|
"loss": 2.2215, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy": 0.16666666666666666, |
|
"eval_loss": 2.4757392406463623, |
|
"eval_runtime": 5.5008, |
|
"eval_samples_per_second": 5.454, |
|
"eval_steps_per_second": 0.364, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.797979797979798e-05, |
|
"loss": 1.8738, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.671717171717172e-05, |
|
"loss": 1.7389, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 2.363579511642456, |
|
"eval_runtime": 6.4613, |
|
"eval_samples_per_second": 4.643, |
|
"eval_steps_per_second": 0.31, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 1.3366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_accuracy": 0.3, |
|
"eval_loss": 2.2424137592315674, |
|
"eval_runtime": 5.7626, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.347, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.41919191919192e-05, |
|
"loss": 1.1946, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.3, |
|
"eval_loss": 2.167524814605713, |
|
"eval_runtime": 5.8137, |
|
"eval_samples_per_second": 5.16, |
|
"eval_steps_per_second": 0.344, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.292929292929293e-05, |
|
"loss": 0.8832, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6809, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_accuracy": 0.36666666666666664, |
|
"eval_loss": 2.0548174381256104, |
|
"eval_runtime": 5.6032, |
|
"eval_samples_per_second": 5.354, |
|
"eval_steps_per_second": 0.357, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.5255, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 2.0410492420196533, |
|
"eval_runtime": 5.8803, |
|
"eval_samples_per_second": 5.102, |
|
"eval_steps_per_second": 0.34, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.9141414141414145e-05, |
|
"loss": 0.4759, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.787878787878788e-05, |
|
"loss": 0.3285, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.9539462327957153, |
|
"eval_runtime": 6.0204, |
|
"eval_samples_per_second": 4.983, |
|
"eval_steps_per_second": 0.332, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.661616161616162e-05, |
|
"loss": 0.2849, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.8536347150802612, |
|
"eval_runtime": 5.2379, |
|
"eval_samples_per_second": 5.727, |
|
"eval_steps_per_second": 0.382, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.1832, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.8293204307556152, |
|
"eval_runtime": 5.7575, |
|
"eval_samples_per_second": 5.211, |
|
"eval_steps_per_second": 0.347, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 0.1485, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.282828282828283e-05, |
|
"loss": 0.1307, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.8200174570083618, |
|
"eval_runtime": 5.6546, |
|
"eval_samples_per_second": 5.305, |
|
"eval_steps_per_second": 0.354, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 3.1565656565656566e-05, |
|
"loss": 0.0901, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 1.8354666233062744, |
|
"eval_runtime": 5.7638, |
|
"eval_samples_per_second": 5.205, |
|
"eval_steps_per_second": 0.347, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.0757, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.904040404040404e-05, |
|
"loss": 0.0636, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.8200985193252563, |
|
"eval_runtime": 5.0989, |
|
"eval_samples_per_second": 5.884, |
|
"eval_steps_per_second": 0.392, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0413, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.7749541997909546, |
|
"eval_runtime": 5.2291, |
|
"eval_samples_per_second": 5.737, |
|
"eval_steps_per_second": 0.382, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 2.6515151515151516e-05, |
|
"loss": 0.0427, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.745997667312622, |
|
"eval_runtime": 5.2765, |
|
"eval_samples_per_second": 5.686, |
|
"eval_steps_per_second": 0.379, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.0369, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 2.398989898989899e-05, |
|
"loss": 0.0254, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.7804018259048462, |
|
"eval_runtime": 5.5848, |
|
"eval_samples_per_second": 5.372, |
|
"eval_steps_per_second": 0.358, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.0203, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_accuracy": 0.43333333333333335, |
|
"eval_loss": 1.8868685960769653, |
|
"eval_runtime": 5.6579, |
|
"eval_samples_per_second": 5.302, |
|
"eval_steps_per_second": 0.353, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 2.1464646464646466e-05, |
|
"loss": 0.0231, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.0174, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.7740839719772339, |
|
"eval_runtime": 5.4112, |
|
"eval_samples_per_second": 5.544, |
|
"eval_steps_per_second": 0.37, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 1.893939393939394e-05, |
|
"loss": 0.0154, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 20.03, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.7400553226470947, |
|
"eval_runtime": 5.673, |
|
"eval_samples_per_second": 5.288, |
|
"eval_steps_per_second": 0.353, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 1.7676767676767676e-05, |
|
"loss": 0.0136, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.7672396898269653, |
|
"eval_runtime": 5.5489, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 0.36, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.6414141414141416e-05, |
|
"loss": 0.0123, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.0116, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.7792834043502808, |
|
"eval_runtime": 5.7051, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 0.351, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0123, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.8018161058425903, |
|
"eval_runtime": 5.5773, |
|
"eval_samples_per_second": 5.379, |
|
"eval_steps_per_second": 0.359, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 1.2626262626262628e-05, |
|
"loss": 0.0093, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 0.0102, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.8023875951766968, |
|
"eval_runtime": 5.4661, |
|
"eval_samples_per_second": 5.488, |
|
"eval_steps_per_second": 0.366, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.0103, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.8057912588119507, |
|
"eval_runtime": 5.5758, |
|
"eval_samples_per_second": 5.38, |
|
"eval_steps_per_second": 0.359, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 8.838383838383838e-06, |
|
"loss": 0.0089, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.810552954673767, |
|
"eval_runtime": 5.6149, |
|
"eval_samples_per_second": 5.343, |
|
"eval_steps_per_second": 0.356, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 0.009, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 6.313131313131314e-06, |
|
"loss": 0.0088, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.8028618097305298, |
|
"eval_runtime": 5.3559, |
|
"eval_samples_per_second": 5.601, |
|
"eval_steps_per_second": 0.373, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.0092, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.7960565090179443, |
|
"eval_runtime": 5.2538, |
|
"eval_samples_per_second": 5.71, |
|
"eval_steps_per_second": 0.381, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"loss": 0.0082, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 2.5252525252525253e-06, |
|
"loss": 0.0083, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.7939893007278442, |
|
"eval_runtime": 5.0992, |
|
"eval_samples_per_second": 5.883, |
|
"eval_steps_per_second": 0.392, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 1.2626262626262627e-06, |
|
"loss": 0.0099, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 30.03, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.7922049760818481, |
|
"eval_runtime": 5.3556, |
|
"eval_samples_per_second": 5.602, |
|
"eval_steps_per_second": 0.373, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.0085, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.7919764518737793, |
|
"eval_runtime": 5.1737, |
|
"eval_samples_per_second": 5.799, |
|
"eval_steps_per_second": 0.387, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"step": 220, |
|
"total_flos": 1.5320910961010737e+19, |
|
"train_loss": 0.5185655888847329, |
|
"train_runtime": 1238.2272, |
|
"train_samples_per_second": 2.843, |
|
"train_steps_per_second": 0.178 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"eval_accuracy": 0.42857142857142855, |
|
"eval_loss": 1.504156231880188, |
|
"eval_runtime": 2.3481, |
|
"eval_samples_per_second": 5.962, |
|
"eval_steps_per_second": 0.426, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"eval_accuracy": 0.42857142857142855, |
|
"eval_loss": 1.504156231880188, |
|
"eval_runtime": 2.2938, |
|
"eval_samples_per_second": 6.104, |
|
"eval_steps_per_second": 0.436, |
|
"step": 220 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.5320910961010737e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|