liuhaotian's picture
Upload checkpoints
2a0a833
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5.555555555555555e-07,
"loss": 1.4551,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.111111111111111e-06,
"loss": 1.5146,
"step": 2
},
{
"epoch": 0.03,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.4561,
"step": 3
},
{
"epoch": 0.04,
"learning_rate": 2.222222222222222e-06,
"loss": 1.4561,
"step": 4
},
{
"epoch": 0.05,
"learning_rate": 2.7777777777777783e-06,
"loss": 1.4873,
"step": 5
},
{
"epoch": 0.06,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.416,
"step": 6
},
{
"epoch": 0.07,
"learning_rate": 3.88888888888889e-06,
"loss": 1.4082,
"step": 7
},
{
"epoch": 0.08,
"learning_rate": 4.444444444444444e-06,
"loss": 1.2451,
"step": 8
},
{
"epoch": 0.09,
"learning_rate": 5e-06,
"loss": 1.2852,
"step": 9
},
{
"epoch": 0.1,
"learning_rate": 5.555555555555557e-06,
"loss": 1.1455,
"step": 10
},
{
"epoch": 0.11,
"learning_rate": 6.111111111111112e-06,
"loss": 1.1514,
"step": 11
},
{
"epoch": 0.12,
"learning_rate": 6.666666666666667e-06,
"loss": 1.1055,
"step": 12
},
{
"epoch": 0.13,
"learning_rate": 7.222222222222223e-06,
"loss": 0.9419,
"step": 13
},
{
"epoch": 0.14,
"learning_rate": 7.77777777777778e-06,
"loss": 0.9639,
"step": 14
},
{
"epoch": 0.15,
"learning_rate": 8.333333333333334e-06,
"loss": 0.8989,
"step": 15
},
{
"epoch": 0.16,
"learning_rate": 8.888888888888888e-06,
"loss": 0.8335,
"step": 16
},
{
"epoch": 0.17,
"learning_rate": 9.444444444444445e-06,
"loss": 0.8276,
"step": 17
},
{
"epoch": 0.18,
"learning_rate": 1e-05,
"loss": 0.7036,
"step": 18
},
{
"epoch": 0.19,
"learning_rate": 1.0555555555555557e-05,
"loss": 0.6025,
"step": 19
},
{
"epoch": 0.2,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.5479,
"step": 20
},
{
"epoch": 0.21,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.5498,
"step": 21
},
{
"epoch": 0.22,
"learning_rate": 1.2222222222222224e-05,
"loss": 0.4973,
"step": 22
},
{
"epoch": 0.23,
"learning_rate": 1.2777777777777777e-05,
"loss": 0.4192,
"step": 23
},
{
"epoch": 0.24,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.3982,
"step": 24
},
{
"epoch": 0.25,
"learning_rate": 1.388888888888889e-05,
"loss": 0.4087,
"step": 25
},
{
"epoch": 0.26,
"learning_rate": 1.4444444444444446e-05,
"loss": 0.2972,
"step": 26
},
{
"epoch": 0.27,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.2999,
"step": 27
},
{
"epoch": 0.28,
"learning_rate": 1.555555555555556e-05,
"loss": 0.2775,
"step": 28
},
{
"epoch": 0.29,
"learning_rate": 1.6111111111111115e-05,
"loss": 0.2011,
"step": 29
},
{
"epoch": 0.3,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.1993,
"step": 30
},
{
"epoch": 0.31,
"learning_rate": 1.7222222222222224e-05,
"loss": 0.2238,
"step": 31
},
{
"epoch": 0.32,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.1837,
"step": 32
},
{
"epoch": 0.33,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.1543,
"step": 33
},
{
"epoch": 0.34,
"learning_rate": 1.888888888888889e-05,
"loss": 0.1529,
"step": 34
},
{
"epoch": 0.35,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.1431,
"step": 35
},
{
"epoch": 0.36,
"learning_rate": 2e-05,
"loss": 0.1694,
"step": 36
},
{
"epoch": 0.37,
"learning_rate": 1.999996357802219e-05,
"loss": 0.1388,
"step": 37
},
{
"epoch": 0.38,
"learning_rate": 1.9999854312354064e-05,
"loss": 0.1071,
"step": 38
},
{
"epoch": 0.39,
"learning_rate": 1.9999672203791564e-05,
"loss": 0.1104,
"step": 39
},
{
"epoch": 0.4,
"learning_rate": 1.9999417253661235e-05,
"loss": 0.0975,
"step": 40
},
{
"epoch": 0.41,
"learning_rate": 1.999908946382024e-05,
"loss": 0.1121,
"step": 41
},
{
"epoch": 0.42,
"learning_rate": 1.9998688836656322e-05,
"loss": 0.1176,
"step": 42
},
{
"epoch": 0.43,
"learning_rate": 1.9998215375087814e-05,
"loss": 0.0956,
"step": 43
},
{
"epoch": 0.44,
"learning_rate": 1.9997669082563597e-05,
"loss": 0.1043,
"step": 44
},
{
"epoch": 0.45,
"learning_rate": 1.999704996306308e-05,
"loss": 0.0894,
"step": 45
},
{
"epoch": 0.46,
"learning_rate": 1.9996358021096174e-05,
"loss": 0.0992,
"step": 46
},
{
"epoch": 0.47,
"learning_rate": 1.9995593261703263e-05,
"loss": 0.0901,
"step": 47
},
{
"epoch": 0.48,
"learning_rate": 1.9994755690455154e-05,
"loss": 0.0686,
"step": 48
},
{
"epoch": 0.49,
"learning_rate": 1.999384531345304e-05,
"loss": 0.0719,
"step": 49
},
{
"epoch": 0.5,
"learning_rate": 1.9992862137328475e-05,
"loss": 0.0771,
"step": 50
},
{
"epoch": 0.51,
"learning_rate": 1.9991806169243302e-05,
"loss": 0.0585,
"step": 51
},
{
"epoch": 0.52,
"learning_rate": 1.999067741688961e-05,
"loss": 0.0657,
"step": 52
},
{
"epoch": 0.53,
"learning_rate": 1.9989475888489674e-05,
"loss": 0.0706,
"step": 53
},
{
"epoch": 0.54,
"learning_rate": 1.998820159279591e-05,
"loss": 0.081,
"step": 54
},
{
"epoch": 0.55,
"learning_rate": 1.9986854539090783e-05,
"loss": 0.065,
"step": 55
},
{
"epoch": 0.56,
"learning_rate": 1.998543473718677e-05,
"loss": 0.0705,
"step": 56
},
{
"epoch": 0.57,
"learning_rate": 1.9983942197426272e-05,
"loss": 0.0624,
"step": 57
},
{
"epoch": 0.58,
"learning_rate": 1.998237693068153e-05,
"loss": 0.0581,
"step": 58
},
{
"epoch": 0.59,
"learning_rate": 1.9980738948354575e-05,
"loss": 0.0588,
"step": 59
},
{
"epoch": 0.6,
"learning_rate": 1.997902826237712e-05,
"loss": 0.0567,
"step": 60
},
{
"epoch": 0.61,
"learning_rate": 1.9977244885210467e-05,
"loss": 0.0595,
"step": 61
},
{
"epoch": 0.62,
"learning_rate": 1.997538882984545e-05,
"loss": 0.0624,
"step": 62
},
{
"epoch": 0.63,
"learning_rate": 1.9973460109802306e-05,
"loss": 0.0857,
"step": 63
},
{
"epoch": 0.64,
"learning_rate": 1.9971458739130598e-05,
"loss": 0.0722,
"step": 64
},
{
"epoch": 0.65,
"learning_rate": 1.99693847324091e-05,
"loss": 0.0621,
"step": 65
},
{
"epoch": 0.66,
"learning_rate": 1.9967238104745695e-05,
"loss": 0.0504,
"step": 66
},
{
"epoch": 0.67,
"learning_rate": 1.9965018871777272e-05,
"loss": 0.0327,
"step": 67
},
{
"epoch": 0.68,
"learning_rate": 1.99627270496696e-05,
"loss": 0.034,
"step": 68
},
{
"epoch": 0.69,
"learning_rate": 1.996036265511722e-05,
"loss": 0.0649,
"step": 69
},
{
"epoch": 0.7,
"learning_rate": 1.995792570534331e-05,
"loss": 0.0623,
"step": 70
},
{
"epoch": 0.71,
"learning_rate": 1.995541621809959e-05,
"loss": 0.0546,
"step": 71
},
{
"epoch": 0.72,
"learning_rate": 1.995283421166614e-05,
"loss": 0.0439,
"step": 72
},
{
"epoch": 0.73,
"learning_rate": 1.9950179704851332e-05,
"loss": 0.0399,
"step": 73
},
{
"epoch": 0.74,
"learning_rate": 1.994745271699163e-05,
"loss": 0.0524,
"step": 74
},
{
"epoch": 0.75,
"learning_rate": 1.9944653267951507e-05,
"loss": 0.0617,
"step": 75
},
{
"epoch": 0.76,
"learning_rate": 1.9941781378123244e-05,
"loss": 0.0532,
"step": 76
},
{
"epoch": 0.77,
"learning_rate": 1.993883706842683e-05,
"loss": 0.0571,
"step": 77
},
{
"epoch": 0.78,
"learning_rate": 1.993582036030978e-05,
"loss": 0.0505,
"step": 78
},
{
"epoch": 0.79,
"learning_rate": 1.9932731275746986e-05,
"loss": 0.0278,
"step": 79
},
{
"epoch": 0.8,
"learning_rate": 1.9929569837240567e-05,
"loss": 0.0337,
"step": 80
},
{
"epoch": 0.81,
"learning_rate": 1.9926336067819686e-05,
"loss": 0.0369,
"step": 81
},
{
"epoch": 0.82,
"learning_rate": 1.9923029991040405e-05,
"loss": 0.029,
"step": 82
},
{
"epoch": 0.83,
"learning_rate": 1.991965163098549e-05,
"loss": 0.0314,
"step": 83
},
{
"epoch": 0.84,
"learning_rate": 1.9916201012264255e-05,
"loss": 0.06,
"step": 84
},
{
"epoch": 0.85,
"learning_rate": 1.9912678160012367e-05,
"loss": 0.0442,
"step": 85
},
{
"epoch": 0.86,
"learning_rate": 1.9909083099891682e-05,
"loss": 0.0482,
"step": 86
},
{
"epoch": 0.87,
"learning_rate": 1.9905415858090036e-05,
"loss": 0.0448,
"step": 87
},
{
"epoch": 0.88,
"learning_rate": 1.990167646132107e-05,
"loss": 0.0414,
"step": 88
},
{
"epoch": 0.89,
"learning_rate": 1.9897864936824026e-05,
"loss": 0.0385,
"step": 89
},
{
"epoch": 0.9,
"learning_rate": 1.9893981312363563e-05,
"loss": 0.0367,
"step": 90
},
{
"epoch": 0.91,
"learning_rate": 1.989002561622953e-05,
"loss": 0.0314,
"step": 91
},
{
"epoch": 0.92,
"learning_rate": 1.9885997877236788e-05,
"loss": 0.0388,
"step": 92
},
{
"epoch": 0.93,
"learning_rate": 1.988189812472498e-05,
"loss": 0.0384,
"step": 93
},
{
"epoch": 0.94,
"learning_rate": 1.9877726388558325e-05,
"loss": 0.0344,
"step": 94
},
{
"epoch": 0.95,
"learning_rate": 1.9873482699125395e-05,
"loss": 0.0308,
"step": 95
},
{
"epoch": 0.96,
"learning_rate": 1.9869167087338908e-05,
"loss": 0.0502,
"step": 96
},
{
"epoch": 0.97,
"learning_rate": 1.9864779584635485e-05,
"loss": 0.0364,
"step": 97
},
{
"epoch": 0.98,
"learning_rate": 1.9860320222975435e-05,
"loss": 0.0369,
"step": 98
},
{
"epoch": 0.99,
"learning_rate": 1.9855789034842504e-05,
"loss": 0.0278,
"step": 99
},
{
"epoch": 1.0,
"learning_rate": 1.9851186053243667e-05,
"loss": 0.033,
"step": 100
},
{
"epoch": 1.01,
"learning_rate": 1.9846511311708857e-05,
"loss": 0.0205,
"step": 101
},
{
"epoch": 1.02,
"learning_rate": 1.9841764844290744e-05,
"loss": 0.0326,
"step": 102
},
{
"epoch": 1.03,
"learning_rate": 1.983694668556447e-05,
"loss": 0.0182,
"step": 103
},
{
"epoch": 1.04,
"learning_rate": 1.983205687062742e-05,
"loss": 0.0166,
"step": 104
},
{
"epoch": 1.05,
"learning_rate": 1.9827095435098926e-05,
"loss": 0.0178,
"step": 105
},
{
"epoch": 1.06,
"learning_rate": 1.9822062415120053e-05,
"loss": 0.0212,
"step": 106
},
{
"epoch": 1.07,
"learning_rate": 1.9816957847353315e-05,
"loss": 0.0219,
"step": 107
},
{
"epoch": 1.08,
"learning_rate": 1.9811781768982392e-05,
"loss": 0.0219,
"step": 108
},
{
"epoch": 1.09,
"learning_rate": 1.9806534217711893e-05,
"loss": 0.0197,
"step": 109
},
{
"epoch": 1.1,
"learning_rate": 1.9801215231767056e-05,
"loss": 0.0197,
"step": 110
},
{
"epoch": 1.11,
"learning_rate": 1.9795824849893483e-05,
"loss": 0.0235,
"step": 111
},
{
"epoch": 1.12,
"learning_rate": 1.9790363111356838e-05,
"loss": 0.024,
"step": 112
},
{
"epoch": 1.13,
"learning_rate": 1.9784830055942592e-05,
"loss": 0.015,
"step": 113
},
{
"epoch": 1.14,
"learning_rate": 1.977922572395571e-05,
"loss": 0.0134,
"step": 114
},
{
"epoch": 1.15,
"learning_rate": 1.9773550156220356e-05,
"loss": 0.0175,
"step": 115
},
{
"epoch": 1.16,
"learning_rate": 1.9767803394079618e-05,
"loss": 0.0165,
"step": 116
},
{
"epoch": 1.17,
"learning_rate": 1.976198547939518e-05,
"loss": 0.0298,
"step": 117
},
{
"epoch": 1.18,
"learning_rate": 1.975609645454704e-05,
"loss": 0.0181,
"step": 118
},
{
"epoch": 1.19,
"learning_rate": 1.9750136362433178e-05,
"loss": 0.017,
"step": 119
},
{
"epoch": 1.2,
"learning_rate": 1.9744105246469264e-05,
"loss": 0.0187,
"step": 120
},
{
"epoch": 1.21,
"learning_rate": 1.973800315058833e-05,
"loss": 0.0224,
"step": 121
},
{
"epoch": 1.22,
"learning_rate": 1.9731830119240465e-05,
"loss": 0.0144,
"step": 122
},
{
"epoch": 1.23,
"learning_rate": 1.972558619739246e-05,
"loss": 0.0133,
"step": 123
},
{
"epoch": 1.24,
"learning_rate": 1.971927143052752e-05,
"loss": 0.02,
"step": 124
},
{
"epoch": 1.25,
"learning_rate": 1.97128858646449e-05,
"loss": 0.0149,
"step": 125
},
{
"epoch": 1.26,
"learning_rate": 1.9706429546259592e-05,
"loss": 0.0088,
"step": 126
},
{
"epoch": 1.27,
"learning_rate": 1.969990252240197e-05,
"loss": 0.0167,
"step": 127
},
{
"epoch": 1.28,
"learning_rate": 1.9693304840617456e-05,
"loss": 0.0154,
"step": 128
},
{
"epoch": 1.29,
"learning_rate": 1.9686636548966177e-05,
"loss": 0.0152,
"step": 129
},
{
"epoch": 1.3,
"learning_rate": 1.967989769602261e-05,
"loss": 0.0142,
"step": 130
},
{
"epoch": 1.31,
"learning_rate": 1.967308833087522e-05,
"loss": 0.0166,
"step": 131
},
{
"epoch": 1.32,
"learning_rate": 1.9666208503126115e-05,
"loss": 0.0206,
"step": 132
},
{
"epoch": 1.33,
"learning_rate": 1.9659258262890683e-05,
"loss": 0.0182,
"step": 133
},
{
"epoch": 1.34,
"learning_rate": 1.965223766079723e-05,
"loss": 0.0167,
"step": 134
},
{
"epoch": 1.35,
"learning_rate": 1.964514674798659e-05,
"loss": 0.0171,
"step": 135
},
{
"epoch": 1.36,
"learning_rate": 1.963798557611178e-05,
"loss": 0.0255,
"step": 136
},
{
"epoch": 1.37,
"learning_rate": 1.9630754197337612e-05,
"loss": 0.024,
"step": 137
},
{
"epoch": 1.38,
"learning_rate": 1.9623452664340305e-05,
"loss": 0.0158,
"step": 138
},
{
"epoch": 1.39,
"learning_rate": 1.961608103030711e-05,
"loss": 0.0215,
"step": 139
},
{
"epoch": 1.4,
"learning_rate": 1.9608639348935938e-05,
"loss": 0.0177,
"step": 140
},
{
"epoch": 1.41,
"learning_rate": 1.960112767443493e-05,
"loss": 0.0153,
"step": 141
},
{
"epoch": 1.42,
"learning_rate": 1.9593546061522094e-05,
"loss": 0.0098,
"step": 142
},
{
"epoch": 1.43,
"learning_rate": 1.9585894565424903e-05,
"loss": 0.0206,
"step": 143
},
{
"epoch": 1.44,
"learning_rate": 1.957817324187987e-05,
"loss": 0.0198,
"step": 144
},
{
"epoch": 1.45,
"learning_rate": 1.9570382147132187e-05,
"loss": 0.0205,
"step": 145
},
{
"epoch": 1.46,
"learning_rate": 1.9562521337935255e-05,
"loss": 0.0165,
"step": 146
},
{
"epoch": 1.47,
"learning_rate": 1.955459087155033e-05,
"loss": 0.0131,
"step": 147
},
{
"epoch": 1.48,
"learning_rate": 1.9546590805746054e-05,
"loss": 0.0129,
"step": 148
},
{
"epoch": 1.49,
"learning_rate": 1.953852119879808e-05,
"loss": 0.0256,
"step": 149
},
{
"epoch": 1.5,
"learning_rate": 1.953038210948861e-05,
"loss": 0.0173,
"step": 150
},
{
"epoch": 1.51,
"learning_rate": 1.9522173597105997e-05,
"loss": 0.0176,
"step": 151
},
{
"epoch": 1.52,
"learning_rate": 1.9513895721444286e-05,
"loss": 0.0158,
"step": 152
},
{
"epoch": 1.53,
"learning_rate": 1.9505548542802805e-05,
"loss": 0.0111,
"step": 153
},
{
"epoch": 1.54,
"learning_rate": 1.9497132121985695e-05,
"loss": 0.0157,
"step": 154
},
{
"epoch": 1.55,
"learning_rate": 1.9488646520301505e-05,
"loss": 0.0167,
"step": 155
},
{
"epoch": 1.56,
"learning_rate": 1.9480091799562706e-05,
"loss": 0.0111,
"step": 156
},
{
"epoch": 1.57,
"learning_rate": 1.9471468022085273e-05,
"loss": 0.023,
"step": 157
},
{
"epoch": 1.58,
"learning_rate": 1.9462775250688208e-05,
"loss": 0.0145,
"step": 158
},
{
"epoch": 1.59,
"learning_rate": 1.9454013548693103e-05,
"loss": 0.0133,
"step": 159
},
{
"epoch": 1.6,
"learning_rate": 1.9445182979923657e-05,
"loss": 0.0195,
"step": 160
},
{
"epoch": 1.61,
"learning_rate": 1.943628360870522e-05,
"loss": 0.0153,
"step": 161
},
{
"epoch": 1.62,
"learning_rate": 1.9427315499864345e-05,
"loss": 0.0165,
"step": 162
},
{
"epoch": 1.63,
"learning_rate": 1.9418278718728272e-05,
"loss": 0.0166,
"step": 163
},
{
"epoch": 1.64,
"learning_rate": 1.94091733311245e-05,
"loss": 0.0155,
"step": 164
},
{
"epoch": 1.65,
"learning_rate": 1.9399999403380266e-05,
"loss": 0.0176,
"step": 165
},
{
"epoch": 1.66,
"learning_rate": 1.939075700232209e-05,
"loss": 0.0168,
"step": 166
},
{
"epoch": 1.67,
"learning_rate": 1.938144619527528e-05,
"loss": 0.0153,
"step": 167
},
{
"epoch": 1.68,
"learning_rate": 1.937206705006344e-05,
"loss": 0.015,
"step": 168
},
{
"epoch": 1.69,
"learning_rate": 1.9362619635007965e-05,
"loss": 0.0102,
"step": 169
},
{
"epoch": 1.7,
"learning_rate": 1.9353104018927568e-05,
"loss": 0.0161,
"step": 170
},
{
"epoch": 1.71,
"learning_rate": 1.9343520271137764e-05,
"loss": 0.016,
"step": 171
},
{
"epoch": 1.72,
"learning_rate": 1.933386846145036e-05,
"loss": 0.0089,
"step": 172
},
{
"epoch": 1.73,
"learning_rate": 1.9324148660172954e-05,
"loss": 0.0175,
"step": 173
},
{
"epoch": 1.74,
"learning_rate": 1.9314360938108427e-05,
"loss": 0.0171,
"step": 174
},
{
"epoch": 1.75,
"learning_rate": 1.930450536655441e-05,
"loss": 0.0248,
"step": 175
},
{
"epoch": 1.76,
"learning_rate": 1.9294582017302797e-05,
"loss": 0.0101,
"step": 176
},
{
"epoch": 1.77,
"learning_rate": 1.928459096263918e-05,
"loss": 0.0198,
"step": 177
},
{
"epoch": 1.78,
"learning_rate": 1.9274532275342355e-05,
"loss": 0.0145,
"step": 178
},
{
"epoch": 1.79,
"learning_rate": 1.926440602868378e-05,
"loss": 0.0159,
"step": 179
},
{
"epoch": 1.8,
"learning_rate": 1.9254212296427043e-05,
"loss": 0.0134,
"step": 180
},
{
"epoch": 1.81,
"learning_rate": 1.924395115282732e-05,
"loss": 0.0166,
"step": 181
},
{
"epoch": 1.82,
"learning_rate": 1.923362267263084e-05,
"loss": 0.0141,
"step": 182
},
{
"epoch": 1.83,
"learning_rate": 1.922322693107434e-05,
"loss": 0.0168,
"step": 183
},
{
"epoch": 1.84,
"learning_rate": 1.921276400388451e-05,
"loss": 0.0198,
"step": 184
},
{
"epoch": 1.85,
"learning_rate": 1.9202233967277454e-05,
"loss": 0.0154,
"step": 185
},
{
"epoch": 1.86,
"learning_rate": 1.9191636897958123e-05,
"loss": 0.0117,
"step": 186
},
{
"epoch": 1.87,
"learning_rate": 1.918097287311976e-05,
"loss": 0.0202,
"step": 187
},
{
"epoch": 1.88,
"learning_rate": 1.9170241970443344e-05,
"loss": 0.0174,
"step": 188
},
{
"epoch": 1.89,
"learning_rate": 1.9159444268097012e-05,
"loss": 0.0176,
"step": 189
},
{
"epoch": 1.9,
"learning_rate": 1.9148579844735497e-05,
"loss": 0.0123,
"step": 190
},
{
"epoch": 1.91,
"learning_rate": 1.9137648779499562e-05,
"loss": 0.0134,
"step": 191
},
{
"epoch": 1.92,
"learning_rate": 1.9126651152015404e-05,
"loss": 0.0185,
"step": 192
},
{
"epoch": 1.93,
"learning_rate": 1.9115587042394095e-05,
"loss": 0.0177,
"step": 193
},
{
"epoch": 1.94,
"learning_rate": 1.9104456531230986e-05,
"loss": 0.0172,
"step": 194
},
{
"epoch": 1.95,
"learning_rate": 1.9093259699605125e-05,
"loss": 0.012,
"step": 195
},
{
"epoch": 1.96,
"learning_rate": 1.9081996629078655e-05,
"loss": 0.0121,
"step": 196
},
{
"epoch": 1.97,
"learning_rate": 1.9070667401696248e-05,
"loss": 0.017,
"step": 197
},
{
"epoch": 1.98,
"learning_rate": 1.905927209998447e-05,
"loss": 0.0106,
"step": 198
},
{
"epoch": 1.99,
"learning_rate": 1.9047810806951207e-05,
"loss": 0.0173,
"step": 199
},
{
"epoch": 2.0,
"learning_rate": 1.9036283606085057e-05,
"loss": 0.0099,
"step": 200
},
{
"epoch": 2.01,
"learning_rate": 1.90246905813547e-05,
"loss": 0.0068,
"step": 201
},
{
"epoch": 2.02,
"learning_rate": 1.9013031817208325e-05,
"loss": 0.007,
"step": 202
},
{
"epoch": 2.03,
"learning_rate": 1.9001307398572976e-05,
"loss": 0.0074,
"step": 203
},
{
"epoch": 2.04,
"learning_rate": 1.8989517410853956e-05,
"loss": 0.0057,
"step": 204
},
{
"epoch": 2.05,
"learning_rate": 1.89776619399342e-05,
"loss": 0.0058,
"step": 205
},
{
"epoch": 2.06,
"learning_rate": 1.8965741072173647e-05,
"loss": 0.0051,
"step": 206
},
{
"epoch": 2.07,
"learning_rate": 1.8953754894408617e-05,
"loss": 0.0078,
"step": 207
},
{
"epoch": 2.08,
"learning_rate": 1.8941703493951163e-05,
"loss": 0.0067,
"step": 208
},
{
"epoch": 2.09,
"learning_rate": 1.8929586958588465e-05,
"loss": 0.0087,
"step": 209
},
{
"epoch": 2.1,
"learning_rate": 1.8917405376582144e-05,
"loss": 0.0057,
"step": 210
},
{
"epoch": 2.11,
"learning_rate": 1.8905158836667678e-05,
"loss": 0.0078,
"step": 211
},
{
"epoch": 2.12,
"learning_rate": 1.8892847428053692e-05,
"loss": 0.0052,
"step": 212
},
{
"epoch": 2.13,
"learning_rate": 1.8880471240421365e-05,
"loss": 0.0069,
"step": 213
},
{
"epoch": 2.14,
"learning_rate": 1.8868030363923747e-05,
"loss": 0.0083,
"step": 214
},
{
"epoch": 2.15,
"learning_rate": 1.8855524889185096e-05,
"loss": 0.0053,
"step": 215
},
{
"epoch": 2.16,
"learning_rate": 1.8842954907300236e-05,
"loss": 0.0058,
"step": 216
},
{
"epoch": 2.17,
"learning_rate": 1.8830320509833898e-05,
"loss": 0.0055,
"step": 217
},
{
"epoch": 2.18,
"learning_rate": 1.8817621788820017e-05,
"loss": 0.0065,
"step": 218
},
{
"epoch": 2.19,
"learning_rate": 1.880485883676111e-05,
"loss": 0.0061,
"step": 219
},
{
"epoch": 2.2,
"learning_rate": 1.8792031746627563e-05,
"loss": 0.0073,
"step": 220
},
{
"epoch": 2.21,
"learning_rate": 1.8779140611856977e-05,
"loss": 0.0068,
"step": 221
},
{
"epoch": 2.22,
"learning_rate": 1.876618552635348e-05,
"loss": 0.0093,
"step": 222
},
{
"epoch": 2.23,
"learning_rate": 1.875316658448703e-05,
"loss": 0.0064,
"step": 223
},
{
"epoch": 2.24,
"learning_rate": 1.874008388109276e-05,
"loss": 0.0056,
"step": 224
},
{
"epoch": 2.25,
"learning_rate": 1.8726937511470247e-05,
"loss": 0.0054,
"step": 225
},
{
"epoch": 2.26,
"learning_rate": 1.8713727571382857e-05,
"loss": 0.0078,
"step": 226
},
{
"epoch": 2.27,
"learning_rate": 1.870045415705701e-05,
"loss": 0.0093,
"step": 227
},
{
"epoch": 2.28,
"learning_rate": 1.8687117365181514e-05,
"loss": 0.0042,
"step": 228
},
{
"epoch": 2.29,
"learning_rate": 1.867371729290683e-05,
"loss": 0.0068,
"step": 229
},
{
"epoch": 2.3,
"learning_rate": 1.866025403784439e-05,
"loss": 0.0066,
"step": 230
},
{
"epoch": 2.31,
"learning_rate": 1.8646727698065865e-05,
"loss": 0.0063,
"step": 231
},
{
"epoch": 2.32,
"learning_rate": 1.863313837210247e-05,
"loss": 0.0063,
"step": 232
},
{
"epoch": 2.33,
"learning_rate": 1.8619486158944223e-05,
"loss": 0.0063,
"step": 233
},
{
"epoch": 2.34,
"learning_rate": 1.8605771158039253e-05,
"loss": 0.0071,
"step": 234
},
{
"epoch": 2.35,
"learning_rate": 1.859199346929305e-05,
"loss": 0.007,
"step": 235
},
{
"epoch": 2.36,
"learning_rate": 1.8578153193067746e-05,
"loss": 0.0068,
"step": 236
},
{
"epoch": 2.37,
"learning_rate": 1.8564250430181387e-05,
"loss": 0.0084,
"step": 237
},
{
"epoch": 2.38,
"learning_rate": 1.8550285281907198e-05,
"loss": 0.0054,
"step": 238
},
{
"epoch": 2.39,
"learning_rate": 1.8536257849972846e-05,
"loss": 0.0058,
"step": 239
},
{
"epoch": 2.4,
"learning_rate": 1.8522168236559693e-05,
"loss": 0.0067,
"step": 240
},
{
"epoch": 2.41,
"learning_rate": 1.8508016544302057e-05,
"loss": 0.0063,
"step": 241
},
{
"epoch": 2.42,
"learning_rate": 1.849380287628646e-05,
"loss": 0.0064,
"step": 242
},
{
"epoch": 2.43,
"learning_rate": 1.847952733605088e-05,
"loss": 0.0056,
"step": 243
},
{
"epoch": 2.44,
"learning_rate": 1.8465190027584007e-05,
"loss": 0.0064,
"step": 244
},
{
"epoch": 2.45,
"learning_rate": 1.8450791055324457e-05,
"loss": 0.0052,
"step": 245
},
{
"epoch": 2.46,
"learning_rate": 1.8436330524160048e-05,
"loss": 0.0052,
"step": 246
},
{
"epoch": 2.47,
"learning_rate": 1.8421808539427006e-05,
"loss": 0.0066,
"step": 247
},
{
"epoch": 2.48,
"learning_rate": 1.840722520690921e-05,
"loss": 0.0049,
"step": 248
},
{
"epoch": 2.49,
"learning_rate": 1.8392580632837423e-05,
"loss": 0.0055,
"step": 249
},
{
"epoch": 2.5,
"learning_rate": 1.837787492388852e-05,
"loss": 0.0062,
"step": 250
},
{
"epoch": 2.51,
"learning_rate": 1.8363108187184702e-05,
"loss": 0.0068,
"step": 251
},
{
"epoch": 2.52,
"learning_rate": 1.8348280530292712e-05,
"loss": 0.0067,
"step": 252
},
{
"epoch": 2.53,
"learning_rate": 1.833339206122308e-05,
"loss": 0.0067,
"step": 253
},
{
"epoch": 2.54,
"learning_rate": 1.831844288842929e-05,
"loss": 0.0056,
"step": 254
},
{
"epoch": 2.55,
"learning_rate": 1.8303433120807043e-05,
"loss": 0.0068,
"step": 255
},
{
"epoch": 2.56,
"learning_rate": 1.8288362867693414e-05,
"loss": 0.0062,
"step": 256
},
{
"epoch": 2.57,
"learning_rate": 1.8273232238866094e-05,
"loss": 0.0043,
"step": 257
},
{
"epoch": 2.58,
"learning_rate": 1.8258041344542567e-05,
"loss": 0.008,
"step": 258
},
{
"epoch": 2.59,
"learning_rate": 1.8242790295379315e-05,
"loss": 0.0077,
"step": 259
},
{
"epoch": 2.6,
"learning_rate": 1.8227479202471016e-05,
"loss": 0.0054,
"step": 260
},
{
"epoch": 2.61,
"learning_rate": 1.8212108177349722e-05,
"loss": 0.0069,
"step": 261
},
{
"epoch": 2.62,
"learning_rate": 1.819667733198406e-05,
"loss": 0.0076,
"step": 262
},
{
"epoch": 2.63,
"learning_rate": 1.818118677877842e-05,
"loss": 0.0046,
"step": 263
},
{
"epoch": 2.64,
"learning_rate": 1.816563663057211e-05,
"loss": 0.0066,
"step": 264
},
{
"epoch": 2.65,
"learning_rate": 1.8150027000638566e-05,
"loss": 0.0056,
"step": 265
},
{
"epoch": 2.66,
"learning_rate": 1.8134358002684504e-05,
"loss": 0.0081,
"step": 266
},
{
"epoch": 2.67,
"learning_rate": 1.8118629750849106e-05,
"loss": 0.0064,
"step": 267
},
{
"epoch": 2.68,
"learning_rate": 1.8102842359703177e-05,
"loss": 0.0057,
"step": 268
},
{
"epoch": 2.69,
"learning_rate": 1.808699594424832e-05,
"loss": 0.0062,
"step": 269
},
{
"epoch": 2.7,
"learning_rate": 1.8071090619916095e-05,
"loss": 0.0092,
"step": 270
},
{
"epoch": 2.71,
"learning_rate": 1.8055126502567172e-05,
"loss": 0.0089,
"step": 271
},
{
"epoch": 2.72,
"learning_rate": 1.8039103708490503e-05,
"loss": 0.0056,
"step": 272
},
{
"epoch": 2.73,
"learning_rate": 1.802302235440245e-05,
"loss": 0.0056,
"step": 273
},
{
"epoch": 2.74,
"learning_rate": 1.8006882557445965e-05,
"loss": 0.0044,
"step": 274
},
{
"epoch": 2.75,
"learning_rate": 1.7990684435189706e-05,
"loss": 0.0054,
"step": 275
},
{
"epoch": 2.76,
"learning_rate": 1.797442810562721e-05,
"loss": 0.0054,
"step": 276
},
{
"epoch": 2.77,
"learning_rate": 1.7958113687176006e-05,
"loss": 0.006,
"step": 277
},
{
"epoch": 2.78,
"learning_rate": 1.7941741298676777e-05,
"loss": 0.0055,
"step": 278
},
{
"epoch": 2.79,
"learning_rate": 1.7925311059392472e-05,
"loss": 0.006,
"step": 279
},
{
"epoch": 2.8,
"learning_rate": 1.790882308900746e-05,
"loss": 0.0061,
"step": 280
},
{
"epoch": 2.81,
"learning_rate": 1.7892277507626627e-05,
"loss": 0.0071,
"step": 281
},
{
"epoch": 2.82,
"learning_rate": 1.7875674435774546e-05,
"loss": 0.0052,
"step": 282
},
{
"epoch": 2.83,
"learning_rate": 1.785901399439455e-05,
"loss": 0.0053,
"step": 283
},
{
"epoch": 2.84,
"learning_rate": 1.7842296304847892e-05,
"loss": 0.0083,
"step": 284
},
{
"epoch": 2.85,
"learning_rate": 1.7825521488912833e-05,
"loss": 0.0072,
"step": 285
},
{
"epoch": 2.86,
"learning_rate": 1.7808689668783762e-05,
"loss": 0.0068,
"step": 286
},
{
"epoch": 2.87,
"learning_rate": 1.7791800967070324e-05,
"loss": 0.0049,
"step": 287
},
{
"epoch": 2.88,
"learning_rate": 1.7774855506796497e-05,
"loss": 0.0058,
"step": 288
},
{
"epoch": 2.89,
"learning_rate": 1.7757853411399714e-05,
"loss": 0.0042,
"step": 289
},
{
"epoch": 2.9,
"learning_rate": 1.7740794804729972e-05,
"loss": 0.0053,
"step": 290
},
{
"epoch": 2.91,
"learning_rate": 1.7723679811048904e-05,
"loss": 0.0043,
"step": 291
},
{
"epoch": 2.92,
"learning_rate": 1.7706508555028895e-05,
"loss": 0.0068,
"step": 292
},
{
"epoch": 2.93,
"learning_rate": 1.7689281161752164e-05,
"loss": 0.0078,
"step": 293
},
{
"epoch": 2.94,
"learning_rate": 1.767199775670986e-05,
"loss": 0.0079,
"step": 294
},
{
"epoch": 2.95,
"learning_rate": 1.7654658465801145e-05,
"loss": 0.0068,
"step": 295
},
{
"epoch": 2.96,
"learning_rate": 1.7637263415332272e-05,
"loss": 0.0051,
"step": 296
},
{
"epoch": 2.97,
"learning_rate": 1.7619812732015664e-05,
"loss": 0.0037,
"step": 297
},
{
"epoch": 2.98,
"learning_rate": 1.7602306542969006e-05,
"loss": 0.0045,
"step": 298
},
{
"epoch": 2.99,
"learning_rate": 1.75847449757143e-05,
"loss": 0.0055,
"step": 299
},
{
"epoch": 3.0,
"learning_rate": 1.7567128158176955e-05,
"loss": 0.0037,
"step": 300
},
{
"epoch": 3.01,
"learning_rate": 1.7549456218684833e-05,
"loss": 0.003,
"step": 301
},
{
"epoch": 3.02,
"learning_rate": 1.753172928596733e-05,
"loss": 0.0046,
"step": 302
},
{
"epoch": 3.03,
"learning_rate": 1.7513947489154443e-05,
"loss": 0.0035,
"step": 303
},
{
"epoch": 3.04,
"learning_rate": 1.749611095777581e-05,
"loss": 0.0035,
"step": 304
},
{
"epoch": 3.05,
"learning_rate": 1.7478219821759778e-05,
"loss": 0.0035,
"step": 305
},
{
"epoch": 3.06,
"learning_rate": 1.7460274211432463e-05,
"loss": 0.0048,
"step": 306
},
{
"epoch": 3.07,
"learning_rate": 1.7442274257516786e-05,
"loss": 0.0038,
"step": 307
},
{
"epoch": 3.08,
"learning_rate": 1.7424220091131536e-05,
"loss": 0.0041,
"step": 308
},
{
"epoch": 3.09,
"learning_rate": 1.74061118437904e-05,
"loss": 0.0046,
"step": 309
},
{
"epoch": 3.1,
"learning_rate": 1.738794964740101e-05,
"loss": 0.0042,
"step": 310
},
{
"epoch": 3.11,
"learning_rate": 1.7369733634264e-05,
"loss": 0.0044,
"step": 311
},
{
"epoch": 3.12,
"learning_rate": 1.7351463937072008e-05,
"loss": 0.0041,
"step": 312
},
{
"epoch": 3.13,
"learning_rate": 1.7333140688908733e-05,
"loss": 0.0045,
"step": 313
},
{
"epoch": 3.14,
"learning_rate": 1.731476402324796e-05,
"loss": 0.0035,
"step": 314
},
{
"epoch": 3.15,
"learning_rate": 1.7296334073952606e-05,
"loss": 0.0036,
"step": 315
},
{
"epoch": 3.16,
"learning_rate": 1.7277850975273694e-05,
"loss": 0.0038,
"step": 316
},
{
"epoch": 3.17,
"learning_rate": 1.7259314861849438e-05,
"loss": 0.0035,
"step": 317
},
{
"epoch": 3.18,
"learning_rate": 1.7240725868704218e-05,
"loss": 0.0047,
"step": 318
},
{
"epoch": 3.19,
"learning_rate": 1.7222084131247608e-05,
"loss": 0.0038,
"step": 319
},
{
"epoch": 3.2,
"learning_rate": 1.7203389785273402e-05,
"loss": 0.0032,
"step": 320
},
{
"epoch": 3.21,
"learning_rate": 1.718464296695861e-05,
"loss": 0.0031,
"step": 321
},
{
"epoch": 3.22,
"learning_rate": 1.716584381286247e-05,
"loss": 0.0028,
"step": 322
},
{
"epoch": 3.23,
"learning_rate": 1.714699245992546e-05,
"loss": 0.0029,
"step": 323
},
{
"epoch": 3.24,
"learning_rate": 1.7128089045468294e-05,
"loss": 0.0036,
"step": 324
},
{
"epoch": 3.25,
"learning_rate": 1.7109133707190913e-05,
"loss": 0.003,
"step": 325
},
{
"epoch": 3.26,
"learning_rate": 1.7090126583171503e-05,
"loss": 0.0034,
"step": 326
},
{
"epoch": 3.27,
"learning_rate": 1.7071067811865477e-05,
"loss": 0.0039,
"step": 327
},
{
"epoch": 3.28,
"learning_rate": 1.705195753210446e-05,
"loss": 0.0046,
"step": 328
},
{
"epoch": 3.29,
"learning_rate": 1.7032795883095287e-05,
"loss": 0.0031,
"step": 329
},
{
"epoch": 3.3,
"learning_rate": 1.7013583004418994e-05,
"loss": 0.0051,
"step": 330
},
{
"epoch": 3.31,
"learning_rate": 1.6994319036029786e-05,
"loss": 0.0035,
"step": 331
},
{
"epoch": 3.32,
"learning_rate": 1.697500411825403e-05,
"loss": 0.0036,
"step": 332
},
{
"epoch": 3.33,
"learning_rate": 1.695563839178923e-05,
"loss": 0.0044,
"step": 333
},
{
"epoch": 3.34,
"learning_rate": 1.693622199770299e-05,
"loss": 0.0035,
"step": 334
},
{
"epoch": 3.35,
"learning_rate": 1.6916755077432016e-05,
"loss": 0.0053,
"step": 335
},
{
"epoch": 3.36,
"learning_rate": 1.6897237772781046e-05,
"loss": 0.0068,
"step": 336
},
{
"epoch": 3.37,
"learning_rate": 1.6877670225921848e-05,
"loss": 0.0063,
"step": 337
},
{
"epoch": 3.38,
"learning_rate": 1.6858052579392182e-05,
"loss": 0.0032,
"step": 338
},
{
"epoch": 3.39,
"learning_rate": 1.6838384976094738e-05,
"loss": 0.0054,
"step": 339
},
{
"epoch": 3.4,
"learning_rate": 1.681866755929612e-05,
"loss": 0.0043,
"step": 340
},
{
"epoch": 3.41,
"learning_rate": 1.6798900472625793e-05,
"loss": 0.0041,
"step": 341
},
{
"epoch": 3.42,
"learning_rate": 1.6779083860075032e-05,
"loss": 0.0043,
"step": 342
},
{
"epoch": 3.43,
"learning_rate": 1.6759217865995884e-05,
"loss": 0.0044,
"step": 343
},
{
"epoch": 3.44,
"learning_rate": 1.673930263510011e-05,
"loss": 0.0033,
"step": 344
},
{
"epoch": 3.45,
"learning_rate": 1.6719338312458123e-05,
"loss": 0.0027,
"step": 345
},
{
"epoch": 3.46,
"learning_rate": 1.6699325043497957e-05,
"loss": 0.0031,
"step": 346
},
{
"epoch": 3.47,
"learning_rate": 1.667926297400417e-05,
"loss": 0.0034,
"step": 347
},
{
"epoch": 3.48,
"learning_rate": 1.665915225011681e-05,
"loss": 0.0037,
"step": 348
},
{
"epoch": 3.49,
"learning_rate": 1.6638993018330357e-05,
"loss": 0.0036,
"step": 349
},
{
"epoch": 3.5,
"learning_rate": 1.6618785425492618e-05,
"loss": 0.0033,
"step": 350
},
{
"epoch": 3.51,
"learning_rate": 1.65985296188037e-05,
"loss": 0.0034,
"step": 351
},
{
"epoch": 3.52,
"learning_rate": 1.6578225745814907e-05,
"loss": 0.0042,
"step": 352
},
{
"epoch": 3.53,
"learning_rate": 1.6557873954427684e-05,
"loss": 0.0031,
"step": 353
},
{
"epoch": 3.54,
"learning_rate": 1.6537474392892527e-05,
"loss": 0.004,
"step": 354
},
{
"epoch": 3.55,
"learning_rate": 1.651702720980791e-05,
"loss": 0.0043,
"step": 355
},
{
"epoch": 3.56,
"learning_rate": 1.6496532554119214e-05,
"loss": 0.0041,
"step": 356
},
{
"epoch": 3.57,
"learning_rate": 1.6475990575117603e-05,
"loss": 0.0037,
"step": 357
},
{
"epoch": 3.58,
"learning_rate": 1.6455401422438984e-05,
"loss": 0.0044,
"step": 358
},
{
"epoch": 3.59,
"learning_rate": 1.6434765246062893e-05,
"loss": 0.0033,
"step": 359
},
{
"epoch": 3.6,
"learning_rate": 1.6414082196311402e-05,
"loss": 0.0044,
"step": 360
},
{
"epoch": 3.61,
"learning_rate": 1.6393352423848016e-05,
"loss": 0.0032,
"step": 361
},
{
"epoch": 3.62,
"learning_rate": 1.637257607967661e-05,
"loss": 0.0048,
"step": 362
},
{
"epoch": 3.63,
"learning_rate": 1.6351753315140285e-05,
"loss": 0.0029,
"step": 363
},
{
"epoch": 3.64,
"learning_rate": 1.63308842819203e-05,
"loss": 0.0043,
"step": 364
},
{
"epoch": 3.65,
"learning_rate": 1.6309969132034947e-05,
"loss": 0.0043,
"step": 365
},
{
"epoch": 3.66,
"learning_rate": 1.6289008017838447e-05,
"loss": 0.0045,
"step": 366
},
{
"epoch": 3.67,
"learning_rate": 1.626800109201985e-05,
"loss": 0.0046,
"step": 367
},
{
"epoch": 3.68,
"learning_rate": 1.6246948507601915e-05,
"loss": 0.004,
"step": 368
},
{
"epoch": 3.69,
"learning_rate": 1.622585041793999e-05,
"loss": 0.0031,
"step": 369
},
{
"epoch": 3.7,
"learning_rate": 1.620470697672091e-05,
"loss": 0.0046,
"step": 370
},
{
"epoch": 3.71,
"learning_rate": 1.6183518337961864e-05,
"loss": 0.0031,
"step": 371
},
{
"epoch": 3.72,
"learning_rate": 1.6162284656009276e-05,
"loss": 0.0032,
"step": 372
},
{
"epoch": 3.73,
"learning_rate": 1.6141006085537683e-05,
"loss": 0.0026,
"step": 373
},
{
"epoch": 3.74,
"learning_rate": 1.6119682781548615e-05,
"loss": 0.0028,
"step": 374
},
{
"epoch": 3.75,
"learning_rate": 1.6098314899369446e-05,
"loss": 0.004,
"step": 375
},
{
"epoch": 3.76,
"learning_rate": 1.607690259465229e-05,
"loss": 0.0023,
"step": 376
},
{
"epoch": 3.77,
"learning_rate": 1.605544602337284e-05,
"loss": 0.0049,
"step": 377
},
{
"epoch": 3.78,
"learning_rate": 1.603394534182925e-05,
"loss": 0.0045,
"step": 378
},
{
"epoch": 3.79,
"learning_rate": 1.6012400706640986e-05,
"loss": 0.0036,
"step": 379
},
{
"epoch": 3.8,
"learning_rate": 1.5990812274747695e-05,
"loss": 0.004,
"step": 380
},
{
"epoch": 3.81,
"learning_rate": 1.5969180203408052e-05,
"loss": 0.0039,
"step": 381
},
{
"epoch": 3.82,
"learning_rate": 1.5947504650198628e-05,
"loss": 0.0035,
"step": 382
},
{
"epoch": 3.83,
"learning_rate": 1.592578577301272e-05,
"loss": 0.004,
"step": 383
},
{
"epoch": 3.84,
"learning_rate": 1.5904023730059227e-05,
"loss": 0.0047,
"step": 384
},
{
"epoch": 3.85,
"learning_rate": 1.5882218679861476e-05,
"loss": 0.0035,
"step": 385
},
{
"epoch": 3.86,
"learning_rate": 1.586037078125607e-05,
"loss": 0.0038,
"step": 386
},
{
"epoch": 3.87,
"learning_rate": 1.5838480193391753e-05,
"loss": 0.0038,
"step": 387
},
{
"epoch": 3.88,
"learning_rate": 1.5816547075728227e-05,
"loss": 0.0034,
"step": 388
},
{
"epoch": 3.89,
"learning_rate": 1.5794571588034992e-05,
"loss": 0.0032,
"step": 389
},
{
"epoch": 3.9,
"learning_rate": 1.5772553890390196e-05,
"loss": 0.0031,
"step": 390
},
{
"epoch": 3.91,
"learning_rate": 1.5750494143179456e-05,
"loss": 0.0025,
"step": 391
},
{
"epoch": 3.92,
"learning_rate": 1.57283925070947e-05,
"loss": 0.0034,
"step": 392
},
{
"epoch": 3.93,
"learning_rate": 1.5706249143132982e-05,
"loss": 0.0034,
"step": 393
},
{
"epoch": 3.94,
"learning_rate": 1.5684064212595332e-05,
"loss": 0.0036,
"step": 394
},
{
"epoch": 3.95,
"learning_rate": 1.5661837877085552e-05,
"loss": 0.0056,
"step": 395
},
{
"epoch": 3.96,
"learning_rate": 1.5639570298509067e-05,
"loss": 0.0035,
"step": 396
},
{
"epoch": 3.97,
"learning_rate": 1.5617261639071726e-05,
"loss": 0.0039,
"step": 397
},
{
"epoch": 3.98,
"learning_rate": 1.5594912061278627e-05,
"loss": 0.0045,
"step": 398
},
{
"epoch": 3.99,
"learning_rate": 1.5572521727932937e-05,
"loss": 0.0027,
"step": 399
},
{
"epoch": 4.0,
"learning_rate": 1.55500908021347e-05,
"loss": 0.0022,
"step": 400
},
{
"epoch": 4.01,
"learning_rate": 1.5527619447279657e-05,
"loss": 0.0029,
"step": 401
},
{
"epoch": 4.02,
"learning_rate": 1.5505107827058038e-05,
"loss": 0.0021,
"step": 402
},
{
"epoch": 4.03,
"learning_rate": 1.5482556105453392e-05,
"loss": 0.002,
"step": 403
},
{
"epoch": 4.04,
"learning_rate": 1.545996444674138e-05,
"loss": 0.0019,
"step": 404
},
{
"epoch": 4.05,
"learning_rate": 1.5437333015488586e-05,
"loss": 0.003,
"step": 405
},
{
"epoch": 4.06,
"learning_rate": 1.54146619765513e-05,
"loss": 0.0033,
"step": 406
},
{
"epoch": 4.07,
"learning_rate": 1.5391951495074342e-05,
"loss": 0.0031,
"step": 407
},
{
"epoch": 4.08,
"learning_rate": 1.536920173648984e-05,
"loss": 0.003,
"step": 408
},
{
"epoch": 4.09,
"learning_rate": 1.5346412866516032e-05,
"loss": 0.0023,
"step": 409
},
{
"epoch": 4.1,
"learning_rate": 1.532358505115607e-05,
"loss": 0.0033,
"step": 410
},
{
"epoch": 4.11,
"learning_rate": 1.530071845669678e-05,
"loss": 0.0031,
"step": 411
},
{
"epoch": 4.12,
"learning_rate": 1.5277813249707488e-05,
"loss": 0.0045,
"step": 412
},
{
"epoch": 4.13,
"learning_rate": 1.5254869597038781e-05,
"loss": 0.0024,
"step": 413
},
{
"epoch": 4.14,
"learning_rate": 1.52318876658213e-05,
"loss": 0.0023,
"step": 414
},
{
"epoch": 4.15,
"learning_rate": 1.5208867623464527e-05,
"loss": 0.0018,
"step": 415
},
{
"epoch": 4.16,
"learning_rate": 1.5185809637655548e-05,
"loss": 0.0018,
"step": 416
},
{
"epoch": 4.17,
"learning_rate": 1.516271387635786e-05,
"loss": 0.0029,
"step": 417
},
{
"epoch": 4.18,
"learning_rate": 1.5139580507810118e-05,
"loss": 0.0035,
"step": 418
},
{
"epoch": 4.19,
"learning_rate": 1.5116409700524934e-05,
"loss": 0.002,
"step": 419
},
{
"epoch": 4.2,
"learning_rate": 1.5093201623287631e-05,
"loss": 0.0027,
"step": 420
},
{
"epoch": 4.21,
"learning_rate": 1.5069956445155027e-05,
"loss": 0.0025,
"step": 421
},
{
"epoch": 4.22,
"learning_rate": 1.504667433545419e-05,
"loss": 0.0028,
"step": 422
},
{
"epoch": 4.23,
"learning_rate": 1.5023355463781221e-05,
"loss": 0.0022,
"step": 423
},
{
"epoch": 4.24,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.0022,
"step": 424
},
{
"epoch": 4.25,
"learning_rate": 1.4976608114240972e-05,
"loss": 0.0016,
"step": 425
},
{
"epoch": 4.26,
"learning_rate": 1.4953179976899878e-05,
"loss": 0.0032,
"step": 426
},
{
"epoch": 4.27,
"learning_rate": 1.4929715758636541e-05,
"loss": 0.0034,
"step": 427
},
{
"epoch": 4.28,
"learning_rate": 1.4906215630373606e-05,
"loss": 0.0028,
"step": 428
},
{
"epoch": 4.29,
"learning_rate": 1.4882679763295307e-05,
"loss": 0.0023,
"step": 429
},
{
"epoch": 4.3,
"learning_rate": 1.4859108328846205e-05,
"loss": 0.003,
"step": 430
},
{
"epoch": 4.31,
"learning_rate": 1.4835501498729958e-05,
"loss": 0.0032,
"step": 431
},
{
"epoch": 4.32,
"learning_rate": 1.4811859444908053e-05,
"loss": 0.0023,
"step": 432
},
{
"epoch": 4.33,
"learning_rate": 1.4788182339598557e-05,
"loss": 0.0022,
"step": 433
},
{
"epoch": 4.34,
"learning_rate": 1.4764470355274877e-05,
"loss": 0.0024,
"step": 434
},
{
"epoch": 4.35,
"learning_rate": 1.4740723664664483e-05,
"loss": 0.0031,
"step": 435
},
{
"epoch": 4.36,
"learning_rate": 1.4716942440747663e-05,
"loss": 0.0022,
"step": 436
},
{
"epoch": 4.37,
"learning_rate": 1.469312685675626e-05,
"loss": 0.0018,
"step": 437
},
{
"epoch": 4.38,
"learning_rate": 1.4669277086172406e-05,
"loss": 0.0032,
"step": 438
},
{
"epoch": 4.39,
"learning_rate": 1.464539330272727e-05,
"loss": 0.0023,
"step": 439
},
{
"epoch": 4.4,
"learning_rate": 1.4621475680399771e-05,
"loss": 0.0027,
"step": 440
},
{
"epoch": 4.41,
"learning_rate": 1.4597524393415336e-05,
"loss": 0.0027,
"step": 441
},
{
"epoch": 4.42,
"learning_rate": 1.457353961624461e-05,
"loss": 0.0045,
"step": 442
},
{
"epoch": 4.43,
"learning_rate": 1.4549521523602198e-05,
"loss": 0.0022,
"step": 443
},
{
"epoch": 4.44,
"learning_rate": 1.4525470290445392e-05,
"loss": 0.0025,
"step": 444
},
{
"epoch": 4.45,
"learning_rate": 1.450138609197288e-05,
"loss": 0.0022,
"step": 445
},
{
"epoch": 4.46,
"learning_rate": 1.4477269103623496e-05,
"loss": 0.0023,
"step": 446
},
{
"epoch": 4.47,
"learning_rate": 1.4453119501074924e-05,
"loss": 0.0021,
"step": 447
},
{
"epoch": 4.48,
"learning_rate": 1.4428937460242417e-05,
"loss": 0.0025,
"step": 448
},
{
"epoch": 4.49,
"learning_rate": 1.440472315727753e-05,
"loss": 0.003,
"step": 449
},
{
"epoch": 4.5,
"learning_rate": 1.4380476768566825e-05,
"loss": 0.0023,
"step": 450
},
{
"epoch": 4.51,
"learning_rate": 1.4356198470730584e-05,
"loss": 0.0022,
"step": 451
},
{
"epoch": 4.52,
"learning_rate": 1.4331888440621533e-05,
"loss": 0.0023,
"step": 452
},
{
"epoch": 4.53,
"learning_rate": 1.4307546855323549e-05,
"loss": 0.0024,
"step": 453
},
{
"epoch": 4.54,
"learning_rate": 1.4283173892150366e-05,
"loss": 0.0018,
"step": 454
},
{
"epoch": 4.55,
"learning_rate": 1.425876972864429e-05,
"loss": 0.0018,
"step": 455
},
{
"epoch": 4.56,
"learning_rate": 1.4234334542574906e-05,
"loss": 0.0026,
"step": 456
},
{
"epoch": 4.57,
"learning_rate": 1.4209868511937765e-05,
"loss": 0.002,
"step": 457
},
{
"epoch": 4.58,
"learning_rate": 1.4185371814953116e-05,
"loss": 0.0023,
"step": 458
},
{
"epoch": 4.59,
"learning_rate": 1.4160844630064596e-05,
"loss": 0.0028,
"step": 459
},
{
"epoch": 4.6,
"learning_rate": 1.4136287135937915e-05,
"loss": 0.0035,
"step": 460
},
{
"epoch": 4.61,
"learning_rate": 1.4111699511459578e-05,
"loss": 0.0021,
"step": 461
},
{
"epoch": 4.62,
"learning_rate": 1.4087081935735565e-05,
"loss": 0.0026,
"step": 462
},
{
"epoch": 4.63,
"learning_rate": 1.4062434588090033e-05,
"loss": 0.003,
"step": 463
},
{
"epoch": 4.64,
"learning_rate": 1.4037757648064019e-05,
"loss": 0.0029,
"step": 464
},
{
"epoch": 4.65,
"learning_rate": 1.4013051295414108e-05,
"loss": 0.0021,
"step": 465
},
{
"epoch": 4.66,
"learning_rate": 1.3988315710111151e-05,
"loss": 0.004,
"step": 466
},
{
"epoch": 4.67,
"learning_rate": 1.3963551072338932e-05,
"loss": 0.0022,
"step": 467
},
{
"epoch": 4.68,
"learning_rate": 1.3938757562492873e-05,
"loss": 0.0029,
"step": 468
},
{
"epoch": 4.69,
"learning_rate": 1.3913935361178706e-05,
"loss": 0.0023,
"step": 469
},
{
"epoch": 4.7,
"learning_rate": 1.3889084649211157e-05,
"loss": 0.002,
"step": 470
},
{
"epoch": 4.71,
"learning_rate": 1.3864205607612648e-05,
"loss": 0.0024,
"step": 471
},
{
"epoch": 4.72,
"learning_rate": 1.3839298417611964e-05,
"loss": 0.003,
"step": 472
},
{
"epoch": 4.73,
"learning_rate": 1.381436326064292e-05,
"loss": 0.0017,
"step": 473
},
{
"epoch": 4.74,
"learning_rate": 1.378940031834307e-05,
"loss": 0.0023,
"step": 474
},
{
"epoch": 4.75,
"learning_rate": 1.3764409772552354e-05,
"loss": 0.0034,
"step": 475
},
{
"epoch": 4.76,
"learning_rate": 1.3739391805311795e-05,
"loss": 0.002,
"step": 476
},
{
"epoch": 4.77,
"learning_rate": 1.3714346598862168e-05,
"loss": 0.0022,
"step": 477
},
{
"epoch": 4.78,
"learning_rate": 1.3689274335642653e-05,
"loss": 0.0017,
"step": 478
},
{
"epoch": 4.79,
"learning_rate": 1.3664175198289543e-05,
"loss": 0.0025,
"step": 479
},
{
"epoch": 4.8,
"learning_rate": 1.3639049369634878e-05,
"loss": 0.0031,
"step": 480
},
{
"epoch": 4.81,
"learning_rate": 1.3613897032705132e-05,
"loss": 0.0026,
"step": 481
},
{
"epoch": 4.82,
"learning_rate": 1.3588718370719878e-05,
"loss": 0.0027,
"step": 482
},
{
"epoch": 4.83,
"learning_rate": 1.356351356709045e-05,
"loss": 0.0026,
"step": 483
},
{
"epoch": 4.84,
"learning_rate": 1.353828280541861e-05,
"loss": 0.0028,
"step": 484
},
{
"epoch": 4.85,
"learning_rate": 1.35130262694952e-05,
"loss": 0.0028,
"step": 485
},
{
"epoch": 4.86,
"learning_rate": 1.3487744143298822e-05,
"loss": 0.0025,
"step": 486
},
{
"epoch": 4.87,
"learning_rate": 1.3462436610994487e-05,
"loss": 0.0029,
"step": 487
},
{
"epoch": 4.88,
"learning_rate": 1.3437103856932266e-05,
"loss": 0.0022,
"step": 488
},
{
"epoch": 4.89,
"learning_rate": 1.3411746065645961e-05,
"loss": 0.0029,
"step": 489
},
{
"epoch": 4.9,
"learning_rate": 1.3386363421851757e-05,
"loss": 0.0023,
"step": 490
},
{
"epoch": 4.91,
"learning_rate": 1.336095611044687e-05,
"loss": 0.0024,
"step": 491
},
{
"epoch": 4.92,
"learning_rate": 1.3335524316508208e-05,
"loss": 0.0023,
"step": 492
},
{
"epoch": 4.93,
"learning_rate": 1.3310068225291015e-05,
"loss": 0.0019,
"step": 493
},
{
"epoch": 4.94,
"learning_rate": 1.3284588022227529e-05,
"loss": 0.0023,
"step": 494
},
{
"epoch": 4.95,
"learning_rate": 1.3259083892925633e-05,
"loss": 0.0035,
"step": 495
},
{
"epoch": 4.96,
"learning_rate": 1.3233556023167487e-05,
"loss": 0.0031,
"step": 496
},
{
"epoch": 4.97,
"learning_rate": 1.3208004598908197e-05,
"loss": 0.0035,
"step": 497
},
{
"epoch": 4.98,
"learning_rate": 1.3182429806274442e-05,
"loss": 0.0022,
"step": 498
},
{
"epoch": 4.99,
"learning_rate": 1.3156831831563126e-05,
"loss": 0.0025,
"step": 499
},
{
"epoch": 5.0,
"learning_rate": 1.3131210861240027e-05,
"loss": 0.0023,
"step": 500
},
{
"epoch": 5.01,
"learning_rate": 1.3105567081938423e-05,
"loss": 0.0015,
"step": 501
},
{
"epoch": 5.02,
"learning_rate": 1.3079900680457753e-05,
"loss": 0.002,
"step": 502
},
{
"epoch": 5.03,
"learning_rate": 1.3054211843762232e-05,
"loss": 0.0026,
"step": 503
},
{
"epoch": 5.04,
"learning_rate": 1.3028500758979507e-05,
"loss": 0.0014,
"step": 504
},
{
"epoch": 5.05,
"learning_rate": 1.3002767613399297e-05,
"loss": 0.0025,
"step": 505
},
{
"epoch": 5.06,
"learning_rate": 1.2977012594472008e-05,
"loss": 0.0019,
"step": 506
},
{
"epoch": 5.07,
"learning_rate": 1.2951235889807386e-05,
"loss": 0.0022,
"step": 507
},
{
"epoch": 5.08,
"learning_rate": 1.2925437687173144e-05,
"loss": 0.0026,
"step": 508
},
{
"epoch": 5.09,
"learning_rate": 1.2899618174493593e-05,
"loss": 0.0016,
"step": 509
},
{
"epoch": 5.1,
"learning_rate": 1.2873777539848284e-05,
"loss": 0.0016,
"step": 510
},
{
"epoch": 5.11,
"learning_rate": 1.2847915971470612e-05,
"loss": 0.0028,
"step": 511
},
{
"epoch": 5.12,
"learning_rate": 1.2822033657746478e-05,
"loss": 0.0016,
"step": 512
},
{
"epoch": 5.13,
"learning_rate": 1.279613078721289e-05,
"loss": 0.0016,
"step": 513
},
{
"epoch": 5.14,
"learning_rate": 1.2770207548556607e-05,
"loss": 0.0017,
"step": 514
},
{
"epoch": 5.15,
"learning_rate": 1.2744264130612747e-05,
"loss": 0.0016,
"step": 515
},
{
"epoch": 5.16,
"learning_rate": 1.2718300722363431e-05,
"loss": 0.0018,
"step": 516
},
{
"epoch": 5.17,
"learning_rate": 1.2692317512936397e-05,
"loss": 0.002,
"step": 517
},
{
"epoch": 5.18,
"learning_rate": 1.2666314691603615e-05,
"loss": 0.0017,
"step": 518
},
{
"epoch": 5.19,
"learning_rate": 1.2640292447779932e-05,
"loss": 0.0021,
"step": 519
},
{
"epoch": 5.2,
"learning_rate": 1.2614250971021658e-05,
"loss": 0.0019,
"step": 520
},
{
"epoch": 5.21,
"learning_rate": 1.2588190451025209e-05,
"loss": 0.0026,
"step": 521
},
{
"epoch": 5.22,
"learning_rate": 1.2562111077625723e-05,
"loss": 0.0016,
"step": 522
},
{
"epoch": 5.23,
"learning_rate": 1.2536013040795675e-05,
"loss": 0.0017,
"step": 523
},
{
"epoch": 5.24,
"learning_rate": 1.2509896530643488e-05,
"loss": 0.0021,
"step": 524
},
{
"epoch": 5.25,
"learning_rate": 1.248376173741215e-05,
"loss": 0.0014,
"step": 525
},
{
"epoch": 5.26,
"learning_rate": 1.2457608851477833e-05,
"loss": 0.0018,
"step": 526
},
{
"epoch": 5.27,
"learning_rate": 1.2431438063348505e-05,
"loss": 0.0021,
"step": 527
},
{
"epoch": 5.28,
"learning_rate": 1.2405249563662539e-05,
"loss": 0.0016,
"step": 528
},
{
"epoch": 5.29,
"learning_rate": 1.2379043543187322e-05,
"loss": 0.0013,
"step": 529
},
{
"epoch": 5.3,
"learning_rate": 1.2352820192817878e-05,
"loss": 0.0017,
"step": 530
},
{
"epoch": 5.31,
"learning_rate": 1.2326579703575464e-05,
"loss": 0.002,
"step": 531
},
{
"epoch": 5.32,
"learning_rate": 1.2300322266606176e-05,
"loss": 0.002,
"step": 532
},
{
"epoch": 5.33,
"learning_rate": 1.2274048073179585e-05,
"loss": 0.0023,
"step": 533
},
{
"epoch": 5.34,
"learning_rate": 1.2247757314687296e-05,
"loss": 0.0021,
"step": 534
},
{
"epoch": 5.35,
"learning_rate": 1.22214501826416e-05,
"loss": 0.0014,
"step": 535
},
{
"epoch": 5.36,
"learning_rate": 1.2195126868674052e-05,
"loss": 0.0014,
"step": 536
},
{
"epoch": 5.37,
"learning_rate": 1.2168787564534078e-05,
"loss": 0.0014,
"step": 537
},
{
"epoch": 5.38,
"learning_rate": 1.21424324620876e-05,
"loss": 0.002,
"step": 538
},
{
"epoch": 5.39,
"learning_rate": 1.2116061753315598e-05,
"loss": 0.0013,
"step": 539
},
{
"epoch": 5.4,
"learning_rate": 1.2089675630312755e-05,
"loss": 0.0021,
"step": 540
},
{
"epoch": 5.41,
"learning_rate": 1.2063274285286017e-05,
"loss": 0.0027,
"step": 541
},
{
"epoch": 5.42,
"learning_rate": 1.2036857910553234e-05,
"loss": 0.0014,
"step": 542
},
{
"epoch": 5.43,
"learning_rate": 1.2010426698541728e-05,
"loss": 0.0016,
"step": 543
},
{
"epoch": 5.44,
"learning_rate": 1.1983980841786899e-05,
"loss": 0.001,
"step": 544
},
{
"epoch": 5.45,
"learning_rate": 1.1957520532930831e-05,
"loss": 0.0021,
"step": 545
},
{
"epoch": 5.46,
"learning_rate": 1.1931045964720882e-05,
"loss": 0.0017,
"step": 546
},
{
"epoch": 5.47,
"learning_rate": 1.1904557330008273e-05,
"loss": 0.0017,
"step": 547
},
{
"epoch": 5.48,
"learning_rate": 1.1878054821746703e-05,
"loss": 0.0023,
"step": 548
},
{
"epoch": 5.49,
"learning_rate": 1.1851538632990922e-05,
"loss": 0.002,
"step": 549
},
{
"epoch": 5.5,
"learning_rate": 1.182500895689534e-05,
"loss": 0.0018,
"step": 550
},
{
"epoch": 5.51,
"learning_rate": 1.1798465986712612e-05,
"loss": 0.0018,
"step": 551
},
{
"epoch": 5.52,
"learning_rate": 1.177190991579223e-05,
"loss": 0.0019,
"step": 552
},
{
"epoch": 5.53,
"learning_rate": 1.174534093757912e-05,
"loss": 0.0016,
"step": 553
},
{
"epoch": 5.54,
"learning_rate": 1.171875924561223e-05,
"loss": 0.0023,
"step": 554
},
{
"epoch": 5.55,
"learning_rate": 1.1692165033523117e-05,
"loss": 0.0022,
"step": 555
},
{
"epoch": 5.56,
"learning_rate": 1.1665558495034546e-05,
"loss": 0.0017,
"step": 556
},
{
"epoch": 5.57,
"learning_rate": 1.1638939823959061e-05,
"loss": 0.0018,
"step": 557
},
{
"epoch": 5.58,
"learning_rate": 1.1612309214197599e-05,
"loss": 0.0015,
"step": 558
},
{
"epoch": 5.59,
"learning_rate": 1.1585666859738052e-05,
"loss": 0.0023,
"step": 559
},
{
"epoch": 5.6,
"learning_rate": 1.1559012954653865e-05,
"loss": 0.0018,
"step": 560
},
{
"epoch": 5.61,
"learning_rate": 1.1532347693102632e-05,
"loss": 0.0018,
"step": 561
},
{
"epoch": 5.62,
"learning_rate": 1.1505671269324662e-05,
"loss": 0.0015,
"step": 562
},
{
"epoch": 5.63,
"learning_rate": 1.147898387764158e-05,
"loss": 0.0016,
"step": 563
},
{
"epoch": 5.64,
"learning_rate": 1.1452285712454905e-05,
"loss": 0.0024,
"step": 564
},
{
"epoch": 5.65,
"learning_rate": 1.1425576968244626e-05,
"loss": 0.0015,
"step": 565
},
{
"epoch": 5.66,
"learning_rate": 1.1398857839567811e-05,
"loss": 0.0016,
"step": 566
},
{
"epoch": 5.67,
"learning_rate": 1.1372128521057155e-05,
"loss": 0.0024,
"step": 567
},
{
"epoch": 5.68,
"learning_rate": 1.1345389207419588e-05,
"loss": 0.0016,
"step": 568
},
{
"epoch": 5.69,
"learning_rate": 1.1318640093434849e-05,
"loss": 0.0017,
"step": 569
},
{
"epoch": 5.7,
"learning_rate": 1.1291881373954066e-05,
"loss": 0.0016,
"step": 570
},
{
"epoch": 5.71,
"learning_rate": 1.1265113243898333e-05,
"loss": 0.0016,
"step": 571
},
{
"epoch": 5.72,
"learning_rate": 1.1238335898257305e-05,
"loss": 0.0018,
"step": 572
},
{
"epoch": 5.73,
"learning_rate": 1.1211549532087749e-05,
"loss": 0.0012,
"step": 573
},
{
"epoch": 5.74,
"learning_rate": 1.118475434051216e-05,
"loss": 0.002,
"step": 574
},
{
"epoch": 5.75,
"learning_rate": 1.115795051871731e-05,
"loss": 0.0018,
"step": 575
},
{
"epoch": 5.76,
"learning_rate": 1.1131138261952845e-05,
"loss": 0.0015,
"step": 576
},
{
"epoch": 5.77,
"learning_rate": 1.1104317765529839e-05,
"loss": 0.0018,
"step": 577
},
{
"epoch": 5.78,
"learning_rate": 1.1077489224819402e-05,
"loss": 0.0019,
"step": 578
},
{
"epoch": 5.79,
"learning_rate": 1.105065283525124e-05,
"loss": 0.0013,
"step": 579
},
{
"epoch": 5.8,
"learning_rate": 1.1023808792312226e-05,
"loss": 0.0016,
"step": 580
},
{
"epoch": 5.81,
"learning_rate": 1.0996957291544992e-05,
"loss": 0.0014,
"step": 581
},
{
"epoch": 5.82,
"learning_rate": 1.0970098528546482e-05,
"loss": 0.0014,
"step": 582
},
{
"epoch": 5.83,
"learning_rate": 1.0943232698966556e-05,
"loss": 0.0022,
"step": 583
},
{
"epoch": 5.84,
"learning_rate": 1.0916359998506549e-05,
"loss": 0.0017,
"step": 584
},
{
"epoch": 5.85,
"learning_rate": 1.088948062291783e-05,
"loss": 0.0013,
"step": 585
},
{
"epoch": 5.86,
"learning_rate": 1.086259476800041e-05,
"loss": 0.0011,
"step": 586
},
{
"epoch": 5.87,
"learning_rate": 1.083570262960149e-05,
"loss": 0.001,
"step": 587
},
{
"epoch": 5.88,
"learning_rate": 1.0808804403614044e-05,
"loss": 0.0014,
"step": 588
},
{
"epoch": 5.89,
"learning_rate": 1.0781900285975388e-05,
"loss": 0.0016,
"step": 589
},
{
"epoch": 5.9,
"learning_rate": 1.075499047266576e-05,
"loss": 0.0014,
"step": 590
},
{
"epoch": 5.91,
"learning_rate": 1.0728075159706881e-05,
"loss": 0.0015,
"step": 591
},
{
"epoch": 5.92,
"learning_rate": 1.070115454316054e-05,
"loss": 0.0016,
"step": 592
},
{
"epoch": 5.93,
"learning_rate": 1.0674228819127159e-05,
"loss": 0.0019,
"step": 593
},
{
"epoch": 5.94,
"learning_rate": 1.0647298183744359e-05,
"loss": 0.0023,
"step": 594
},
{
"epoch": 5.95,
"learning_rate": 1.062036283318554e-05,
"loss": 0.0019,
"step": 595
},
{
"epoch": 5.96,
"learning_rate": 1.0593422963658453e-05,
"loss": 0.0014,
"step": 596
},
{
"epoch": 5.97,
"learning_rate": 1.0566478771403763e-05,
"loss": 0.0016,
"step": 597
},
{
"epoch": 5.98,
"learning_rate": 1.0539530452693625e-05,
"loss": 0.0014,
"step": 598
},
{
"epoch": 5.99,
"learning_rate": 1.0512578203830252e-05,
"loss": 0.0013,
"step": 599
},
{
"epoch": 6.0,
"learning_rate": 1.0485622221144485e-05,
"loss": 0.0013,
"step": 600
},
{
"epoch": 6.01,
"learning_rate": 1.0458662700994362e-05,
"loss": 0.0014,
"step": 601
},
{
"epoch": 6.02,
"learning_rate": 1.04316998397637e-05,
"loss": 0.0015,
"step": 602
},
{
"epoch": 6.03,
"learning_rate": 1.0404733833860639e-05,
"loss": 0.0015,
"step": 603
},
{
"epoch": 6.04,
"learning_rate": 1.0377764879716234e-05,
"loss": 0.0011,
"step": 604
},
{
"epoch": 6.05,
"learning_rate": 1.0350793173783017e-05,
"loss": 0.0012,
"step": 605
},
{
"epoch": 6.06,
"learning_rate": 1.0323818912533561e-05,
"loss": 0.0017,
"step": 606
},
{
"epoch": 6.07,
"learning_rate": 1.0296842292459058e-05,
"loss": 0.0017,
"step": 607
},
{
"epoch": 6.08,
"learning_rate": 1.0269863510067872e-05,
"loss": 0.0011,
"step": 608
},
{
"epoch": 6.09,
"learning_rate": 1.0242882761884132e-05,
"loss": 0.0014,
"step": 609
},
{
"epoch": 6.1,
"learning_rate": 1.021590024444628e-05,
"loss": 0.0014,
"step": 610
},
{
"epoch": 6.11,
"learning_rate": 1.0188916154305646e-05,
"loss": 0.0018,
"step": 611
},
{
"epoch": 6.12,
"learning_rate": 1.0161930688025018e-05,
"loss": 0.0011,
"step": 612
},
{
"epoch": 6.13,
"learning_rate": 1.01349440421772e-05,
"loss": 0.0019,
"step": 613
},
{
"epoch": 6.14,
"learning_rate": 1.0107956413343603e-05,
"loss": 0.0014,
"step": 614
},
{
"epoch": 6.15,
"learning_rate": 1.0080967998112787e-05,
"loss": 0.001,
"step": 615
},
{
"epoch": 6.16,
"learning_rate": 1.0053978993079046e-05,
"loss": 0.0019,
"step": 616
},
{
"epoch": 6.17,
"learning_rate": 1.0026989594840965e-05,
"loss": 0.0018,
"step": 617
},
{
"epoch": 6.18,
"learning_rate": 1e-05,
"loss": 0.0015,
"step": 618
},
{
"epoch": 6.19,
"learning_rate": 9.973010405159037e-06,
"loss": 0.0011,
"step": 619
},
{
"epoch": 6.2,
"learning_rate": 9.946021006920959e-06,
"loss": 0.0013,
"step": 620
},
{
"epoch": 6.21,
"learning_rate": 9.919032001887215e-06,
"loss": 0.0016,
"step": 621
},
{
"epoch": 6.22,
"learning_rate": 9.892043586656402e-06,
"loss": 0.0012,
"step": 622
},
{
"epoch": 6.23,
"learning_rate": 9.865055957822802e-06,
"loss": 0.0012,
"step": 623
},
{
"epoch": 6.24,
"learning_rate": 9.838069311974986e-06,
"loss": 0.0017,
"step": 624
},
{
"epoch": 6.25,
"learning_rate": 9.811083845694358e-06,
"loss": 0.002,
"step": 625
},
{
"epoch": 6.26,
"learning_rate": 9.784099755553723e-06,
"loss": 0.0012,
"step": 626
},
{
"epoch": 6.27,
"learning_rate": 9.757117238115871e-06,
"loss": 0.0014,
"step": 627
},
{
"epoch": 6.28,
"learning_rate": 9.730136489932133e-06,
"loss": 0.0012,
"step": 628
},
{
"epoch": 6.29,
"learning_rate": 9.703157707540949e-06,
"loss": 0.0014,
"step": 629
},
{
"epoch": 6.3,
"learning_rate": 9.676181087466444e-06,
"loss": 0.0013,
"step": 630
},
{
"epoch": 6.31,
"learning_rate": 9.649206826216988e-06,
"loss": 0.0012,
"step": 631
},
{
"epoch": 6.32,
"learning_rate": 9.622235120283769e-06,
"loss": 0.0018,
"step": 632
},
{
"epoch": 6.33,
"learning_rate": 9.595266166139366e-06,
"loss": 0.0012,
"step": 633
},
{
"epoch": 6.34,
"learning_rate": 9.568300160236305e-06,
"loss": 0.0016,
"step": 634
},
{
"epoch": 6.35,
"learning_rate": 9.54133729900564e-06,
"loss": 0.0017,
"step": 635
},
{
"epoch": 6.36,
"learning_rate": 9.514377778855521e-06,
"loss": 0.002,
"step": 636
},
{
"epoch": 6.37,
"learning_rate": 9.487421796169751e-06,
"loss": 0.0016,
"step": 637
},
{
"epoch": 6.38,
"learning_rate": 9.460469547306375e-06,
"loss": 0.0016,
"step": 638
},
{
"epoch": 6.39,
"learning_rate": 9.433521228596237e-06,
"loss": 0.0016,
"step": 639
},
{
"epoch": 6.4,
"learning_rate": 9.406577036341548e-06,
"loss": 0.0014,
"step": 640
},
{
"epoch": 6.41,
"learning_rate": 9.37963716681446e-06,
"loss": 0.0014,
"step": 641
},
{
"epoch": 6.42,
"learning_rate": 9.352701816255643e-06,
"loss": 0.0012,
"step": 642
},
{
"epoch": 6.43,
"learning_rate": 9.325771180872843e-06,
"loss": 0.0009,
"step": 643
},
{
"epoch": 6.44,
"learning_rate": 9.298845456839459e-06,
"loss": 0.0015,
"step": 644
},
{
"epoch": 6.45,
"learning_rate": 9.27192484029312e-06,
"loss": 0.001,
"step": 645
},
{
"epoch": 6.46,
"learning_rate": 9.245009527334243e-06,
"loss": 0.0013,
"step": 646
},
{
"epoch": 6.47,
"learning_rate": 9.218099714024613e-06,
"loss": 0.0013,
"step": 647
},
{
"epoch": 6.48,
"learning_rate": 9.19119559638596e-06,
"loss": 0.0011,
"step": 648
},
{
"epoch": 6.49,
"learning_rate": 9.164297370398512e-06,
"loss": 0.0011,
"step": 649
},
{
"epoch": 6.5,
"learning_rate": 9.137405231999594e-06,
"loss": 0.0013,
"step": 650
},
{
"epoch": 6.51,
"learning_rate": 9.110519377082174e-06,
"loss": 0.0011,
"step": 651
},
{
"epoch": 6.52,
"learning_rate": 9.083640001493455e-06,
"loss": 0.0018,
"step": 652
},
{
"epoch": 6.53,
"learning_rate": 9.056767301033445e-06,
"loss": 0.0014,
"step": 653
},
{
"epoch": 6.54,
"learning_rate": 9.02990147145352e-06,
"loss": 0.0017,
"step": 654
},
{
"epoch": 6.55,
"learning_rate": 9.003042708455011e-06,
"loss": 0.0014,
"step": 655
},
{
"epoch": 6.56,
"learning_rate": 8.976191207687775e-06,
"loss": 0.0007,
"step": 656
},
{
"epoch": 6.57,
"learning_rate": 8.949347164748761e-06,
"loss": 0.0015,
"step": 657
},
{
"epoch": 6.58,
"learning_rate": 8.9225107751806e-06,
"loss": 0.0011,
"step": 658
},
{
"epoch": 6.59,
"learning_rate": 8.895682234470163e-06,
"loss": 0.002,
"step": 659
},
{
"epoch": 6.6,
"learning_rate": 8.868861738047158e-06,
"loss": 0.0016,
"step": 660
},
{
"epoch": 6.61,
"learning_rate": 8.842049481282691e-06,
"loss": 0.0011,
"step": 661
},
{
"epoch": 6.62,
"learning_rate": 8.815245659487841e-06,
"loss": 0.001,
"step": 662
},
{
"epoch": 6.63,
"learning_rate": 8.788450467912254e-06,
"loss": 0.0013,
"step": 663
},
{
"epoch": 6.64,
"learning_rate": 8.7616641017427e-06,
"loss": 0.0015,
"step": 664
},
{
"epoch": 6.65,
"learning_rate": 8.73488675610167e-06,
"loss": 0.0012,
"step": 665
},
{
"epoch": 6.66,
"learning_rate": 8.708118626045939e-06,
"loss": 0.0017,
"step": 666
},
{
"epoch": 6.67,
"learning_rate": 8.681359906565154e-06,
"loss": 0.0013,
"step": 667
},
{
"epoch": 6.68,
"learning_rate": 8.654610792580415e-06,
"loss": 0.0013,
"step": 668
},
{
"epoch": 6.69,
"learning_rate": 8.62787147894285e-06,
"loss": 0.0017,
"step": 669
},
{
"epoch": 6.7,
"learning_rate": 8.601142160432194e-06,
"loss": 0.0008,
"step": 670
},
{
"epoch": 6.71,
"learning_rate": 8.574423031755377e-06,
"loss": 0.0013,
"step": 671
},
{
"epoch": 6.72,
"learning_rate": 8.5477142875451e-06,
"loss": 0.0015,
"step": 672
},
{
"epoch": 6.73,
"learning_rate": 8.521016122358421e-06,
"loss": 0.0014,
"step": 673
},
{
"epoch": 6.74,
"learning_rate": 8.494328730675338e-06,
"loss": 0.001,
"step": 674
},
{
"epoch": 6.75,
"learning_rate": 8.46765230689737e-06,
"loss": 0.0014,
"step": 675
},
{
"epoch": 6.76,
"learning_rate": 8.440987045346135e-06,
"loss": 0.0016,
"step": 676
},
{
"epoch": 6.77,
"learning_rate": 8.41433314026195e-06,
"loss": 0.0015,
"step": 677
},
{
"epoch": 6.78,
"learning_rate": 8.387690785802403e-06,
"loss": 0.0015,
"step": 678
},
{
"epoch": 6.79,
"learning_rate": 8.361060176040939e-06,
"loss": 0.0014,
"step": 679
},
{
"epoch": 6.8,
"learning_rate": 8.334441504965456e-06,
"loss": 0.0012,
"step": 680
},
{
"epoch": 6.81,
"learning_rate": 8.307834966476885e-06,
"loss": 0.0013,
"step": 681
},
{
"epoch": 6.82,
"learning_rate": 8.281240754387772e-06,
"loss": 0.0012,
"step": 682
},
{
"epoch": 6.83,
"learning_rate": 8.254659062420884e-06,
"loss": 0.0011,
"step": 683
},
{
"epoch": 6.84,
"learning_rate": 8.228090084207773e-06,
"loss": 0.0021,
"step": 684
},
{
"epoch": 6.85,
"learning_rate": 8.201534013287391e-06,
"loss": 0.0012,
"step": 685
},
{
"epoch": 6.86,
"learning_rate": 8.174991043104662e-06,
"loss": 0.001,
"step": 686
},
{
"epoch": 6.87,
"learning_rate": 8.148461367009081e-06,
"loss": 0.0012,
"step": 687
},
{
"epoch": 6.88,
"learning_rate": 8.1219451782533e-06,
"loss": 0.0015,
"step": 688
},
{
"epoch": 6.89,
"learning_rate": 8.09544266999173e-06,
"loss": 0.0016,
"step": 689
},
{
"epoch": 6.9,
"learning_rate": 8.068954035279121e-06,
"loss": 0.0012,
"step": 690
},
{
"epoch": 6.91,
"learning_rate": 8.04247946706917e-06,
"loss": 0.0009,
"step": 691
},
{
"epoch": 6.92,
"learning_rate": 8.016019158213103e-06,
"loss": 0.0015,
"step": 692
},
{
"epoch": 6.93,
"learning_rate": 7.989573301458274e-06,
"loss": 0.0015,
"step": 693
},
{
"epoch": 6.94,
"learning_rate": 7.963142089446769e-06,
"loss": 0.0017,
"step": 694
},
{
"epoch": 6.95,
"learning_rate": 7.936725714713985e-06,
"loss": 0.0015,
"step": 695
},
{
"epoch": 6.96,
"learning_rate": 7.91032436968725e-06,
"loss": 0.0015,
"step": 696
},
{
"epoch": 6.97,
"learning_rate": 7.883938246684405e-06,
"loss": 0.0012,
"step": 697
},
{
"epoch": 6.98,
"learning_rate": 7.857567537912404e-06,
"loss": 0.0012,
"step": 698
},
{
"epoch": 6.99,
"learning_rate": 7.831212435465925e-06,
"loss": 0.0024,
"step": 699
},
{
"epoch": 7.0,
"learning_rate": 7.804873131325955e-06,
"loss": 0.0011,
"step": 700
},
{
"epoch": 7.01,
"learning_rate": 7.778549817358404e-06,
"loss": 0.0011,
"step": 701
},
{
"epoch": 7.02,
"learning_rate": 7.752242685312709e-06,
"loss": 0.0016,
"step": 702
},
{
"epoch": 7.03,
"learning_rate": 7.725951926820421e-06,
"loss": 0.0011,
"step": 703
},
{
"epoch": 7.04,
"learning_rate": 7.699677733393827e-06,
"loss": 0.0011,
"step": 704
},
{
"epoch": 7.05,
"learning_rate": 7.673420296424541e-06,
"loss": 0.0008,
"step": 705
},
{
"epoch": 7.06,
"learning_rate": 7.647179807182125e-06,
"loss": 0.0011,
"step": 706
},
{
"epoch": 7.07,
"learning_rate": 7.620956456812682e-06,
"loss": 0.001,
"step": 707
},
{
"epoch": 7.08,
"learning_rate": 7.594750436337467e-06,
"loss": 0.001,
"step": 708
},
{
"epoch": 7.09,
"learning_rate": 7.568561936651496e-06,
"loss": 0.0008,
"step": 709
},
{
"epoch": 7.1,
"learning_rate": 7.5423911485221675e-06,
"loss": 0.0012,
"step": 710
},
{
"epoch": 7.11,
"learning_rate": 7.516238262587851e-06,
"loss": 0.0007,
"step": 711
},
{
"epoch": 7.12,
"learning_rate": 7.490103469356513e-06,
"loss": 0.0012,
"step": 712
},
{
"epoch": 7.13,
"learning_rate": 7.463986959204324e-06,
"loss": 0.0009,
"step": 713
},
{
"epoch": 7.14,
"learning_rate": 7.4378889223742766e-06,
"loss": 0.0008,
"step": 714
},
{
"epoch": 7.15,
"learning_rate": 7.411809548974792e-06,
"loss": 0.0012,
"step": 715
},
{
"epoch": 7.16,
"learning_rate": 7.385749028978347e-06,
"loss": 0.0009,
"step": 716
},
{
"epoch": 7.17,
"learning_rate": 7.35970755222007e-06,
"loss": 0.0016,
"step": 717
},
{
"epoch": 7.18,
"learning_rate": 7.333685308396383e-06,
"loss": 0.001,
"step": 718
},
{
"epoch": 7.19,
"learning_rate": 7.307682487063608e-06,
"loss": 0.0011,
"step": 719
},
{
"epoch": 7.2,
"learning_rate": 7.2816992776365714e-06,
"loss": 0.0007,
"step": 720
},
{
"epoch": 7.21,
"learning_rate": 7.255735869387257e-06,
"loss": 0.0012,
"step": 721
},
{
"epoch": 7.22,
"learning_rate": 7.2297924514433985e-06,
"loss": 0.0008,
"step": 722
},
{
"epoch": 7.23,
"learning_rate": 7.203869212787112e-06,
"loss": 0.0015,
"step": 723
},
{
"epoch": 7.24,
"learning_rate": 7.1779663422535235e-06,
"loss": 0.0012,
"step": 724
},
{
"epoch": 7.25,
"learning_rate": 7.152084028529389e-06,
"loss": 0.0008,
"step": 725
},
{
"epoch": 7.26,
"learning_rate": 7.126222460151719e-06,
"loss": 0.0011,
"step": 726
},
{
"epoch": 7.27,
"learning_rate": 7.100381825506408e-06,
"loss": 0.0008,
"step": 727
},
{
"epoch": 7.28,
"learning_rate": 7.0745623128268605e-06,
"loss": 0.0007,
"step": 728
},
{
"epoch": 7.29,
"learning_rate": 7.048764110192618e-06,
"loss": 0.0013,
"step": 729
},
{
"epoch": 7.3,
"learning_rate": 7.022987405527997e-06,
"loss": 0.0009,
"step": 730
},
{
"epoch": 7.31,
"learning_rate": 6.997232386600706e-06,
"loss": 0.001,
"step": 731
},
{
"epoch": 7.32,
"learning_rate": 6.971499241020495e-06,
"loss": 0.0009,
"step": 732
},
{
"epoch": 7.33,
"learning_rate": 6.945788156237772e-06,
"loss": 0.0008,
"step": 733
},
{
"epoch": 7.34,
"learning_rate": 6.920099319542249e-06,
"loss": 0.0011,
"step": 734
},
{
"epoch": 7.35,
"learning_rate": 6.894432918061579e-06,
"loss": 0.0011,
"step": 735
},
{
"epoch": 7.36,
"learning_rate": 6.868789138759977e-06,
"loss": 0.001,
"step": 736
},
{
"epoch": 7.37,
"learning_rate": 6.843168168436879e-06,
"loss": 0.0009,
"step": 737
},
{
"epoch": 7.38,
"learning_rate": 6.8175701937255645e-06,
"loss": 0.0011,
"step": 738
},
{
"epoch": 7.39,
"learning_rate": 6.7919954010918075e-06,
"loss": 0.0009,
"step": 739
},
{
"epoch": 7.4,
"learning_rate": 6.766443976832518e-06,
"loss": 0.0013,
"step": 740
},
{
"epoch": 7.41,
"learning_rate": 6.740916107074372e-06,
"loss": 0.0014,
"step": 741
},
{
"epoch": 7.42,
"learning_rate": 6.7154119777724736e-06,
"loss": 0.001,
"step": 742
},
{
"epoch": 7.43,
"learning_rate": 6.689931774708991e-06,
"loss": 0.0009,
"step": 743
},
{
"epoch": 7.44,
"learning_rate": 6.664475683491797e-06,
"loss": 0.0012,
"step": 744
},
{
"epoch": 7.45,
"learning_rate": 6.639043889553134e-06,
"loss": 0.001,
"step": 745
},
{
"epoch": 7.46,
"learning_rate": 6.613636578148242e-06,
"loss": 0.0009,
"step": 746
},
{
"epoch": 7.47,
"learning_rate": 6.588253934354039e-06,
"loss": 0.0009,
"step": 747
},
{
"epoch": 7.48,
"learning_rate": 6.562896143067734e-06,
"loss": 0.001,
"step": 748
},
{
"epoch": 7.49,
"learning_rate": 6.5375633890055124e-06,
"loss": 0.001,
"step": 749
},
{
"epoch": 7.5,
"learning_rate": 6.5122558567011775e-06,
"loss": 0.0011,
"step": 750
},
{
"epoch": 7.51,
"learning_rate": 6.4869737305047996e-06,
"loss": 0.0011,
"step": 751
},
{
"epoch": 7.52,
"learning_rate": 6.461717194581394e-06,
"loss": 0.0013,
"step": 752
},
{
"epoch": 7.53,
"learning_rate": 6.43648643290955e-06,
"loss": 0.0009,
"step": 753
},
{
"epoch": 7.54,
"learning_rate": 6.411281629280122e-06,
"loss": 0.0008,
"step": 754
},
{
"epoch": 7.55,
"learning_rate": 6.386102967294872e-06,
"loss": 0.0012,
"step": 755
},
{
"epoch": 7.56,
"learning_rate": 6.360950630365126e-06,
"loss": 0.0011,
"step": 756
},
{
"epoch": 7.57,
"learning_rate": 6.335824801710462e-06,
"loss": 0.001,
"step": 757
},
{
"epoch": 7.58,
"learning_rate": 6.310725664357349e-06,
"loss": 0.0008,
"step": 758
},
{
"epoch": 7.59,
"learning_rate": 6.2856534011378365e-06,
"loss": 0.0009,
"step": 759
},
{
"epoch": 7.6,
"learning_rate": 6.260608194688207e-06,
"loss": 0.0009,
"step": 760
},
{
"epoch": 7.61,
"learning_rate": 6.23559022744765e-06,
"loss": 0.0011,
"step": 761
},
{
"epoch": 7.62,
"learning_rate": 6.210599681656933e-06,
"loss": 0.0012,
"step": 762
},
{
"epoch": 7.63,
"learning_rate": 6.185636739357083e-06,
"loss": 0.001,
"step": 763
},
{
"epoch": 7.64,
"learning_rate": 6.160701582388039e-06,
"loss": 0.001,
"step": 764
},
{
"epoch": 7.65,
"learning_rate": 6.135794392387353e-06,
"loss": 0.0007,
"step": 765
},
{
"epoch": 7.66,
"learning_rate": 6.110915350788846e-06,
"loss": 0.001,
"step": 766
},
{
"epoch": 7.67,
"learning_rate": 6.086064638821298e-06,
"loss": 0.0011,
"step": 767
},
{
"epoch": 7.68,
"learning_rate": 6.061242437507131e-06,
"loss": 0.0008,
"step": 768
},
{
"epoch": 7.69,
"learning_rate": 6.036448927661069e-06,
"loss": 0.0008,
"step": 769
},
{
"epoch": 7.7,
"learning_rate": 6.011684289888851e-06,
"loss": 0.0008,
"step": 770
},
{
"epoch": 7.71,
"learning_rate": 5.986948704585895e-06,
"loss": 0.0011,
"step": 771
},
{
"epoch": 7.72,
"learning_rate": 5.962242351935985e-06,
"loss": 0.001,
"step": 772
},
{
"epoch": 7.73,
"learning_rate": 5.9375654119099714e-06,
"loss": 0.0013,
"step": 773
},
{
"epoch": 7.74,
"learning_rate": 5.912918064264441e-06,
"loss": 0.0008,
"step": 774
},
{
"epoch": 7.75,
"learning_rate": 5.888300488540426e-06,
"loss": 0.001,
"step": 775
},
{
"epoch": 7.76,
"learning_rate": 5.863712864062089e-06,
"loss": 0.0012,
"step": 776
},
{
"epoch": 7.77,
"learning_rate": 5.839155369935407e-06,
"loss": 0.001,
"step": 777
},
{
"epoch": 7.78,
"learning_rate": 5.814628185046884e-06,
"loss": 0.0011,
"step": 778
},
{
"epoch": 7.79,
"learning_rate": 5.790131488062238e-06,
"loss": 0.0011,
"step": 779
},
{
"epoch": 7.8,
"learning_rate": 5.765665457425102e-06,
"loss": 0.0006,
"step": 780
},
{
"epoch": 7.81,
"learning_rate": 5.741230271355714e-06,
"loss": 0.001,
"step": 781
},
{
"epoch": 7.82,
"learning_rate": 5.716826107849633e-06,
"loss": 0.0009,
"step": 782
},
{
"epoch": 7.83,
"learning_rate": 5.692453144676451e-06,
"loss": 0.0009,
"step": 783
},
{
"epoch": 7.84,
"learning_rate": 5.6681115593784705e-06,
"loss": 0.0007,
"step": 784
},
{
"epoch": 7.85,
"learning_rate": 5.643801529269419e-06,
"loss": 0.0013,
"step": 785
},
{
"epoch": 7.86,
"learning_rate": 5.619523231433177e-06,
"loss": 0.0009,
"step": 786
},
{
"epoch": 7.87,
"learning_rate": 5.595276842722469e-06,
"loss": 0.0008,
"step": 787
},
{
"epoch": 7.88,
"learning_rate": 5.571062539757582e-06,
"loss": 0.0013,
"step": 788
},
{
"epoch": 7.89,
"learning_rate": 5.546880498925079e-06,
"loss": 0.0006,
"step": 789
},
{
"epoch": 7.9,
"learning_rate": 5.522730896376506e-06,
"loss": 0.0012,
"step": 790
},
{
"epoch": 7.91,
"learning_rate": 5.498613908027121e-06,
"loss": 0.001,
"step": 791
},
{
"epoch": 7.92,
"learning_rate": 5.4745297095546125e-06,
"loss": 0.0007,
"step": 792
},
{
"epoch": 7.93,
"learning_rate": 5.450478476397802e-06,
"loss": 0.001,
"step": 793
},
{
"epoch": 7.94,
"learning_rate": 5.4264603837553954e-06,
"loss": 0.0009,
"step": 794
},
{
"epoch": 7.95,
"learning_rate": 5.40247560658467e-06,
"loss": 0.0015,
"step": 795
},
{
"epoch": 7.96,
"learning_rate": 5.378524319600231e-06,
"loss": 0.0011,
"step": 796
},
{
"epoch": 7.97,
"learning_rate": 5.354606697272733e-06,
"loss": 0.001,
"step": 797
},
{
"epoch": 7.98,
"learning_rate": 5.330722913827594e-06,
"loss": 0.0011,
"step": 798
},
{
"epoch": 7.99,
"learning_rate": 5.30687314324374e-06,
"loss": 0.0013,
"step": 799
},
{
"epoch": 8.0,
"learning_rate": 5.2830575592523415e-06,
"loss": 0.0012,
"step": 800
},
{
"epoch": 8.01,
"learning_rate": 5.259276335335522e-06,
"loss": 0.0008,
"step": 801
},
{
"epoch": 8.02,
"learning_rate": 5.235529644725126e-06,
"loss": 0.0008,
"step": 802
},
{
"epoch": 8.03,
"learning_rate": 5.211817660401444e-06,
"loss": 0.0011,
"step": 803
},
{
"epoch": 8.04,
"learning_rate": 5.18814055509195e-06,
"loss": 0.0006,
"step": 804
},
{
"epoch": 8.05,
"learning_rate": 5.164498501270046e-06,
"loss": 0.0008,
"step": 805
},
{
"epoch": 8.06,
"learning_rate": 5.140891671153797e-06,
"loss": 0.0004,
"step": 806
},
{
"epoch": 8.07,
"learning_rate": 5.117320236704697e-06,
"loss": 0.0007,
"step": 807
},
{
"epoch": 8.08,
"learning_rate": 5.093784369626397e-06,
"loss": 0.0009,
"step": 808
},
{
"epoch": 8.09,
"learning_rate": 5.070284241363462e-06,
"loss": 0.0005,
"step": 809
},
{
"epoch": 8.1,
"learning_rate": 5.046820023100129e-06,
"loss": 0.0011,
"step": 810
},
{
"epoch": 8.11,
"learning_rate": 5.023391885759034e-06,
"loss": 0.0008,
"step": 811
},
{
"epoch": 8.12,
"learning_rate": 5.000000000000003e-06,
"loss": 0.0005,
"step": 812
},
{
"epoch": 8.13,
"learning_rate": 4.976644536218783e-06,
"loss": 0.0006,
"step": 813
},
{
"epoch": 8.14,
"learning_rate": 4.953325664545812e-06,
"loss": 0.0008,
"step": 814
},
{
"epoch": 8.15,
"learning_rate": 4.930043554844975e-06,
"loss": 0.0005,
"step": 815
},
{
"epoch": 8.16,
"learning_rate": 4.9067983767123736e-06,
"loss": 0.0008,
"step": 816
},
{
"epoch": 8.17,
"learning_rate": 4.883590299475071e-06,
"loss": 0.0011,
"step": 817
},
{
"epoch": 8.18,
"learning_rate": 4.860419492189886e-06,
"loss": 0.0008,
"step": 818
},
{
"epoch": 8.19,
"learning_rate": 4.837286123642141e-06,
"loss": 0.0011,
"step": 819
},
{
"epoch": 8.2,
"learning_rate": 4.814190362344454e-06,
"loss": 0.0008,
"step": 820
},
{
"epoch": 8.21,
"learning_rate": 4.791132376535476e-06,
"loss": 0.0008,
"step": 821
},
{
"epoch": 8.22,
"learning_rate": 4.7681123341787e-06,
"loss": 0.0006,
"step": 822
},
{
"epoch": 8.23,
"learning_rate": 4.745130402961218e-06,
"loss": 0.0005,
"step": 823
},
{
"epoch": 8.24,
"learning_rate": 4.722186750292511e-06,
"loss": 0.0008,
"step": 824
},
{
"epoch": 8.25,
"learning_rate": 4.699281543303222e-06,
"loss": 0.001,
"step": 825
},
{
"epoch": 8.26,
"learning_rate": 4.676414948843934e-06,
"loss": 0.0007,
"step": 826
},
{
"epoch": 8.27,
"learning_rate": 4.653587133483968e-06,
"loss": 0.0007,
"step": 827
},
{
"epoch": 8.28,
"learning_rate": 4.630798263510162e-06,
"loss": 0.0009,
"step": 828
},
{
"epoch": 8.29,
"learning_rate": 4.608048504925658e-06,
"loss": 0.0007,
"step": 829
},
{
"epoch": 8.3,
"learning_rate": 4.5853380234487025e-06,
"loss": 0.0009,
"step": 830
},
{
"epoch": 8.31,
"learning_rate": 4.562666984511416e-06,
"loss": 0.0009,
"step": 831
},
{
"epoch": 8.32,
"learning_rate": 4.54003555325862e-06,
"loss": 0.0007,
"step": 832
},
{
"epoch": 8.33,
"learning_rate": 4.517443894546609e-06,
"loss": 0.0008,
"step": 833
},
{
"epoch": 8.34,
"learning_rate": 4.494892172941965e-06,
"loss": 0.0009,
"step": 834
},
{
"epoch": 8.35,
"learning_rate": 4.472380552720349e-06,
"loss": 0.0004,
"step": 835
},
{
"epoch": 8.36,
"learning_rate": 4.449909197865303e-06,
"loss": 0.0007,
"step": 836
},
{
"epoch": 8.37,
"learning_rate": 4.427478272067066e-06,
"loss": 0.0009,
"step": 837
},
{
"epoch": 8.38,
"learning_rate": 4.405087938721376e-06,
"loss": 0.0009,
"step": 838
},
{
"epoch": 8.39,
"learning_rate": 4.382738360928277e-06,
"loss": 0.0012,
"step": 839
},
{
"epoch": 8.4,
"learning_rate": 4.360429701490935e-06,
"loss": 0.0012,
"step": 840
},
{
"epoch": 8.41,
"learning_rate": 4.338162122914452e-06,
"loss": 0.0009,
"step": 841
},
{
"epoch": 8.42,
"learning_rate": 4.3159357874046725e-06,
"loss": 0.001,
"step": 842
},
{
"epoch": 8.43,
"learning_rate": 4.2937508568670194e-06,
"loss": 0.0006,
"step": 843
},
{
"epoch": 8.44,
"learning_rate": 4.271607492905303e-06,
"loss": 0.0009,
"step": 844
},
{
"epoch": 8.45,
"learning_rate": 4.249505856820545e-06,
"loss": 0.0007,
"step": 845
},
{
"epoch": 8.46,
"learning_rate": 4.2274461096098085e-06,
"loss": 0.0008,
"step": 846
},
{
"epoch": 8.47,
"learning_rate": 4.205428411965011e-06,
"loss": 0.001,
"step": 847
},
{
"epoch": 8.48,
"learning_rate": 4.183452924271776e-06,
"loss": 0.0008,
"step": 848
},
{
"epoch": 8.49,
"learning_rate": 4.1615198066082475e-06,
"loss": 0.0007,
"step": 849
},
{
"epoch": 8.5,
"learning_rate": 4.139629218743931e-06,
"loss": 0.0008,
"step": 850
},
{
"epoch": 8.51,
"learning_rate": 4.117781320138532e-06,
"loss": 0.0005,
"step": 851
},
{
"epoch": 8.52,
"learning_rate": 4.095976269940777e-06,
"loss": 0.0009,
"step": 852
},
{
"epoch": 8.53,
"learning_rate": 4.074214226987281e-06,
"loss": 0.0006,
"step": 853
},
{
"epoch": 8.54,
"learning_rate": 4.052495349801375e-06,
"loss": 0.001,
"step": 854
},
{
"epoch": 8.55,
"learning_rate": 4.03081979659195e-06,
"loss": 0.0011,
"step": 855
},
{
"epoch": 8.56,
"learning_rate": 4.009187725252309e-06,
"loss": 0.0006,
"step": 856
},
{
"epoch": 8.57,
"learning_rate": 3.987599293359018e-06,
"loss": 0.0007,
"step": 857
},
{
"epoch": 8.58,
"learning_rate": 3.966054658170754e-06,
"loss": 0.0008,
"step": 858
},
{
"epoch": 8.59,
"learning_rate": 3.944553976627161e-06,
"loss": 0.0012,
"step": 859
},
{
"epoch": 8.6,
"learning_rate": 3.923097405347709e-06,
"loss": 0.0009,
"step": 860
},
{
"epoch": 8.61,
"learning_rate": 3.901685100630554e-06,
"loss": 0.0007,
"step": 861
},
{
"epoch": 8.62,
"learning_rate": 3.8803172184513884e-06,
"loss": 0.0009,
"step": 862
},
{
"epoch": 8.63,
"learning_rate": 3.858993914462318e-06,
"loss": 0.0008,
"step": 863
},
{
"epoch": 8.64,
"learning_rate": 3.837715343990727e-06,
"loss": 0.0008,
"step": 864
},
{
"epoch": 8.65,
"learning_rate": 3.816481662038137e-06,
"loss": 0.0008,
"step": 865
},
{
"epoch": 8.66,
"learning_rate": 3.795293023279093e-06,
"loss": 0.0006,
"step": 866
},
{
"epoch": 8.67,
"learning_rate": 3.7741495820600128e-06,
"loss": 0.0006,
"step": 867
},
{
"epoch": 8.68,
"learning_rate": 3.753051492398089e-06,
"loss": 0.0006,
"step": 868
},
{
"epoch": 8.69,
"learning_rate": 3.731998907980151e-06,
"loss": 0.0006,
"step": 869
},
{
"epoch": 8.7,
"learning_rate": 3.7109919821615546e-06,
"loss": 0.0008,
"step": 870
},
{
"epoch": 8.71,
"learning_rate": 3.6900308679650578e-06,
"loss": 0.0008,
"step": 871
},
{
"epoch": 8.72,
"learning_rate": 3.669115718079702e-06,
"loss": 0.0009,
"step": 872
},
{
"epoch": 8.73,
"learning_rate": 3.6482466848597164e-06,
"loss": 0.001,
"step": 873
},
{
"epoch": 8.74,
"learning_rate": 3.627423920323392e-06,
"loss": 0.0009,
"step": 874
},
{
"epoch": 8.75,
"learning_rate": 3.6066475761519837e-06,
"loss": 0.0008,
"step": 875
},
{
"epoch": 8.76,
"learning_rate": 3.585917803688603e-06,
"loss": 0.0005,
"step": 876
},
{
"epoch": 8.77,
"learning_rate": 3.565234753937108e-06,
"loss": 0.0005,
"step": 877
},
{
"epoch": 8.78,
"learning_rate": 3.544598577561016e-06,
"loss": 0.0005,
"step": 878
},
{
"epoch": 8.79,
"learning_rate": 3.5240094248824e-06,
"loss": 0.0009,
"step": 879
},
{
"epoch": 8.8,
"learning_rate": 3.5034674458807893e-06,
"loss": 0.0005,
"step": 880
},
{
"epoch": 8.81,
"learning_rate": 3.4829727901920886e-06,
"loss": 0.0009,
"step": 881
},
{
"epoch": 8.82,
"learning_rate": 3.4625256071074776e-06,
"loss": 0.0007,
"step": 882
},
{
"epoch": 8.83,
"learning_rate": 3.4421260455723202e-06,
"loss": 0.0008,
"step": 883
},
{
"epoch": 8.84,
"learning_rate": 3.421774254185096e-06,
"loss": 0.0008,
"step": 884
},
{
"epoch": 8.85,
"learning_rate": 3.4014703811963024e-06,
"loss": 0.0008,
"step": 885
},
{
"epoch": 8.86,
"learning_rate": 3.3812145745073834e-06,
"loss": 0.0008,
"step": 886
},
{
"epoch": 8.87,
"learning_rate": 3.3610069816696476e-06,
"loss": 0.0008,
"step": 887
},
{
"epoch": 8.88,
"learning_rate": 3.3408477498831917e-06,
"loss": 0.0006,
"step": 888
},
{
"epoch": 8.89,
"learning_rate": 3.320737025995835e-06,
"loss": 0.0007,
"step": 889
},
{
"epoch": 8.9,
"learning_rate": 3.300674956502047e-06,
"loss": 0.001,
"step": 890
},
{
"epoch": 8.91,
"learning_rate": 3.280661687541876e-06,
"loss": 0.0006,
"step": 891
},
{
"epoch": 8.92,
"learning_rate": 3.2606973648998918e-06,
"loss": 0.001,
"step": 892
},
{
"epoch": 8.93,
"learning_rate": 3.2407821340041155e-06,
"loss": 0.0008,
"step": 893
},
{
"epoch": 8.94,
"learning_rate": 3.2209161399249677e-06,
"loss": 0.0009,
"step": 894
},
{
"epoch": 8.95,
"learning_rate": 3.2010995273742075e-06,
"loss": 0.0007,
"step": 895
},
{
"epoch": 8.96,
"learning_rate": 3.1813324407038826e-06,
"loss": 0.0011,
"step": 896
},
{
"epoch": 8.97,
"learning_rate": 3.1616150239052647e-06,
"loss": 0.0008,
"step": 897
},
{
"epoch": 8.98,
"learning_rate": 3.1419474206078203e-06,
"loss": 0.0008,
"step": 898
},
{
"epoch": 8.99,
"learning_rate": 3.1223297740781523e-06,
"loss": 0.0007,
"step": 899
},
{
"epoch": 9.0,
"learning_rate": 3.1027622272189572e-06,
"loss": 0.0012,
"step": 900
},
{
"epoch": 9.01,
"learning_rate": 3.0832449225679873e-06,
"loss": 0.0004,
"step": 901
},
{
"epoch": 9.02,
"learning_rate": 3.063778002297013e-06,
"loss": 0.0004,
"step": 902
},
{
"epoch": 9.03,
"learning_rate": 3.0443616082107753e-06,
"loss": 0.0005,
"step": 903
},
{
"epoch": 9.04,
"learning_rate": 3.024995881745972e-06,
"loss": 0.0005,
"step": 904
},
{
"epoch": 9.05,
"learning_rate": 3.005680963970217e-06,
"loss": 0.0006,
"step": 905
},
{
"epoch": 9.06,
"learning_rate": 2.9864169955810085e-06,
"loss": 0.0006,
"step": 906
},
{
"epoch": 9.07,
"learning_rate": 2.9672041169047174e-06,
"loss": 0.0005,
"step": 907
},
{
"epoch": 9.08,
"learning_rate": 2.948042467895544e-06,
"loss": 0.0005,
"step": 908
},
{
"epoch": 9.09,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.0009,
"step": 909
},
{
"epoch": 9.1,
"learning_rate": 2.909873416828497e-06,
"loss": 0.0007,
"step": 910
},
{
"epoch": 9.11,
"learning_rate": 2.890866292809087e-06,
"loss": 0.0005,
"step": 911
},
{
"epoch": 9.12,
"learning_rate": 2.8719109545317102e-06,
"loss": 0.0005,
"step": 912
},
{
"epoch": 9.13,
"learning_rate": 2.8530075400745405e-06,
"loss": 0.0005,
"step": 913
},
{
"epoch": 9.14,
"learning_rate": 2.8341561871375314e-06,
"loss": 0.0006,
"step": 914
},
{
"epoch": 9.15,
"learning_rate": 2.8153570330413925e-06,
"loss": 0.0006,
"step": 915
},
{
"epoch": 9.16,
"learning_rate": 2.7966102147265993e-06,
"loss": 0.0005,
"step": 916
},
{
"epoch": 9.17,
"learning_rate": 2.7779158687523966e-06,
"loss": 0.0008,
"step": 917
},
{
"epoch": 9.18,
"learning_rate": 2.759274131295787e-06,
"loss": 0.0009,
"step": 918
},
{
"epoch": 9.19,
"learning_rate": 2.740685138150564e-06,
"loss": 0.0005,
"step": 919
},
{
"epoch": 9.2,
"learning_rate": 2.722149024726307e-06,
"loss": 0.0009,
"step": 920
},
{
"epoch": 9.21,
"learning_rate": 2.7036659260473973e-06,
"loss": 0.0005,
"step": 921
},
{
"epoch": 9.22,
"learning_rate": 2.685235976752039e-06,
"loss": 0.0009,
"step": 922
},
{
"epoch": 9.23,
"learning_rate": 2.6668593110912734e-06,
"loss": 0.0009,
"step": 923
},
{
"epoch": 9.24,
"learning_rate": 2.648536062927999e-06,
"loss": 0.0008,
"step": 924
},
{
"epoch": 9.25,
"learning_rate": 2.6302663657360038e-06,
"loss": 0.0011,
"step": 925
},
{
"epoch": 9.26,
"learning_rate": 2.6120503525989894e-06,
"loss": 0.0008,
"step": 926
},
{
"epoch": 9.27,
"learning_rate": 2.593888156209603e-06,
"loss": 0.0007,
"step": 927
},
{
"epoch": 9.28,
"learning_rate": 2.5757799088684654e-06,
"loss": 0.0006,
"step": 928
},
{
"epoch": 9.29,
"learning_rate": 2.5577257424832146e-06,
"loss": 0.0008,
"step": 929
},
{
"epoch": 9.3,
"learning_rate": 2.5397257885675396e-06,
"loss": 0.0005,
"step": 930
},
{
"epoch": 9.31,
"learning_rate": 2.521780178240224e-06,
"loss": 0.0005,
"step": 931
},
{
"epoch": 9.32,
"learning_rate": 2.5038890422241958e-06,
"loss": 0.0006,
"step": 932
},
{
"epoch": 9.33,
"learning_rate": 2.48605251084556e-06,
"loss": 0.0012,
"step": 933
},
{
"epoch": 9.34,
"learning_rate": 2.4682707140326713e-06,
"loss": 0.0007,
"step": 934
},
{
"epoch": 9.35,
"learning_rate": 2.45054378131517e-06,
"loss": 0.0008,
"step": 935
},
{
"epoch": 9.36,
"learning_rate": 2.432871841823047e-06,
"loss": 0.0008,
"step": 936
},
{
"epoch": 9.37,
"learning_rate": 2.415255024285702e-06,
"loss": 0.0009,
"step": 937
},
{
"epoch": 9.38,
"learning_rate": 2.3976934570309974e-06,
"loss": 0.0006,
"step": 938
},
{
"epoch": 9.39,
"learning_rate": 2.3801872679843384e-06,
"loss": 0.0007,
"step": 939
},
{
"epoch": 9.4,
"learning_rate": 2.362736584667731e-06,
"loss": 0.0006,
"step": 940
},
{
"epoch": 9.41,
"learning_rate": 2.345341534198855e-06,
"loss": 0.0005,
"step": 941
},
{
"epoch": 9.42,
"learning_rate": 2.328002243290138e-06,
"loss": 0.0007,
"step": 942
},
{
"epoch": 9.43,
"learning_rate": 2.3107188382478386e-06,
"loss": 0.0006,
"step": 943
},
{
"epoch": 9.44,
"learning_rate": 2.293491444971109e-06,
"loss": 0.0006,
"step": 944
},
{
"epoch": 9.45,
"learning_rate": 2.2763201889510987e-06,
"loss": 0.0003,
"step": 945
},
{
"epoch": 9.46,
"learning_rate": 2.25920519527003e-06,
"loss": 0.0005,
"step": 946
},
{
"epoch": 9.47,
"learning_rate": 2.2421465886002856e-06,
"loss": 0.0011,
"step": 947
},
{
"epoch": 9.48,
"learning_rate": 2.2251444932035094e-06,
"loss": 0.0008,
"step": 948
},
{
"epoch": 9.49,
"learning_rate": 2.208199032929681e-06,
"loss": 0.0005,
"step": 949
},
{
"epoch": 9.5,
"learning_rate": 2.19131033121624e-06,
"loss": 0.0007,
"step": 950
},
{
"epoch": 9.51,
"learning_rate": 2.1744785110871713e-06,
"loss": 0.0009,
"step": 951
},
{
"epoch": 9.52,
"learning_rate": 2.157703695152109e-06,
"loss": 0.0006,
"step": 952
},
{
"epoch": 9.53,
"learning_rate": 2.1409860056054522e-06,
"loss": 0.0006,
"step": 953
},
{
"epoch": 9.54,
"learning_rate": 2.124325564225458e-06,
"loss": 0.0007,
"step": 954
},
{
"epoch": 9.55,
"learning_rate": 2.107722492373375e-06,
"loss": 0.0005,
"step": 955
},
{
"epoch": 9.56,
"learning_rate": 2.091176910992545e-06,
"loss": 0.0005,
"step": 956
},
{
"epoch": 9.57,
"learning_rate": 2.074688940607529e-06,
"loss": 0.0007,
"step": 957
},
{
"epoch": 9.58,
"learning_rate": 2.0582587013232268e-06,
"loss": 0.0005,
"step": 958
},
{
"epoch": 9.59,
"learning_rate": 2.0418863128239964e-06,
"loss": 0.0005,
"step": 959
},
{
"epoch": 9.6,
"learning_rate": 2.025571894372794e-06,
"loss": 0.0007,
"step": 960
},
{
"epoch": 9.61,
"learning_rate": 2.009315564810297e-06,
"loss": 0.0006,
"step": 961
},
{
"epoch": 9.62,
"learning_rate": 1.993117442554039e-06,
"loss": 0.0006,
"step": 962
},
{
"epoch": 9.63,
"learning_rate": 1.976977645597552e-06,
"loss": 0.0005,
"step": 963
},
{
"epoch": 9.64,
"learning_rate": 1.9608962915095e-06,
"loss": 0.0007,
"step": 964
},
{
"epoch": 9.65,
"learning_rate": 1.944873497432829e-06,
"loss": 0.0005,
"step": 965
},
{
"epoch": 9.66,
"learning_rate": 1.9289093800839067e-06,
"loss": 0.0006,
"step": 966
},
{
"epoch": 9.67,
"learning_rate": 1.913004055751679e-06,
"loss": 0.0005,
"step": 967
},
{
"epoch": 9.68,
"learning_rate": 1.897157640296825e-06,
"loss": 0.0008,
"step": 968
},
{
"epoch": 9.69,
"learning_rate": 1.8813702491508956e-06,
"loss": 0.0006,
"step": 969
},
{
"epoch": 9.7,
"learning_rate": 1.865641997315496e-06,
"loss": 0.0006,
"step": 970
},
{
"epoch": 9.71,
"learning_rate": 1.8499729993614345e-06,
"loss": 0.0008,
"step": 971
},
{
"epoch": 9.72,
"learning_rate": 1.8343633694278895e-06,
"loss": 0.0006,
"step": 972
},
{
"epoch": 9.73,
"learning_rate": 1.8188132212215837e-06,
"loss": 0.0007,
"step": 973
},
{
"epoch": 9.74,
"learning_rate": 1.803322668015941e-06,
"loss": 0.0007,
"step": 974
},
{
"epoch": 9.75,
"learning_rate": 1.7878918226502816e-06,
"loss": 0.0009,
"step": 975
},
{
"epoch": 9.76,
"learning_rate": 1.7725207975289883e-06,
"loss": 0.0006,
"step": 976
},
{
"epoch": 9.77,
"learning_rate": 1.757209704620686e-06,
"loss": 0.0008,
"step": 977
},
{
"epoch": 9.78,
"learning_rate": 1.7419586554574364e-06,
"loss": 0.0007,
"step": 978
},
{
"epoch": 9.79,
"learning_rate": 1.7267677611339085e-06,
"loss": 0.0006,
"step": 979
},
{
"epoch": 9.8,
"learning_rate": 1.7116371323065883e-06,
"loss": 0.001,
"step": 980
},
{
"epoch": 9.81,
"learning_rate": 1.69656687919296e-06,
"loss": 0.0009,
"step": 981
},
{
"epoch": 9.82,
"learning_rate": 1.6815571115707108e-06,
"loss": 0.0006,
"step": 982
},
{
"epoch": 9.83,
"learning_rate": 1.666607938776924e-06,
"loss": 0.0007,
"step": 983
},
{
"epoch": 9.84,
"learning_rate": 1.6517194697072903e-06,
"loss": 0.0006,
"step": 984
},
{
"epoch": 9.85,
"learning_rate": 1.6368918128153021e-06,
"loss": 0.0007,
"step": 985
},
{
"epoch": 9.86,
"learning_rate": 1.6221250761114803e-06,
"loss": 0.0006,
"step": 986
},
{
"epoch": 9.87,
"learning_rate": 1.607419367162577e-06,
"loss": 0.0004,
"step": 987
},
{
"epoch": 9.88,
"learning_rate": 1.5927747930907921e-06,
"loss": 0.0007,
"step": 988
},
{
"epoch": 9.89,
"learning_rate": 1.5781914605729997e-06,
"loss": 0.0007,
"step": 989
},
{
"epoch": 9.9,
"learning_rate": 1.5636694758399563e-06,
"loss": 0.0005,
"step": 990
},
{
"epoch": 9.91,
"learning_rate": 1.5492089446755454e-06,
"loss": 0.0003,
"step": 991
},
{
"epoch": 9.92,
"learning_rate": 1.534809972415998e-06,
"loss": 0.0006,
"step": 992
},
{
"epoch": 9.93,
"learning_rate": 1.520472663949122e-06,
"loss": 0.0007,
"step": 993
},
{
"epoch": 9.94,
"learning_rate": 1.5061971237135453e-06,
"loss": 0.0006,
"step": 994
},
{
"epoch": 9.95,
"learning_rate": 1.4919834556979474e-06,
"loss": 0.0009,
"step": 995
},
{
"epoch": 9.96,
"learning_rate": 1.4778317634403082e-06,
"loss": 0.0006,
"step": 996
},
{
"epoch": 9.97,
"learning_rate": 1.4637421500271553e-06,
"loss": 0.0011,
"step": 997
},
{
"epoch": 9.98,
"learning_rate": 1.449714718092803e-06,
"loss": 0.0004,
"step": 998
},
{
"epoch": 9.99,
"learning_rate": 1.4357495698186186e-06,
"loss": 0.0004,
"step": 999
},
{
"epoch": 10.0,
"learning_rate": 1.4218468069322576e-06,
"loss": 0.0005,
"step": 1000
},
{
"epoch": 10.01,
"learning_rate": 1.4080065307069524e-06,
"loss": 0.0005,
"step": 1001
},
{
"epoch": 10.02,
"learning_rate": 1.3942288419607476e-06,
"loss": 0.0006,
"step": 1002
},
{
"epoch": 10.03,
"learning_rate": 1.3805138410557783e-06,
"loss": 0.0005,
"step": 1003
},
{
"epoch": 10.04,
"learning_rate": 1.3668616278975343e-06,
"loss": 0.0003,
"step": 1004
},
{
"epoch": 10.05,
"learning_rate": 1.3532723019341376e-06,
"loss": 0.0005,
"step": 1005
},
{
"epoch": 10.06,
"learning_rate": 1.339745962155613e-06,
"loss": 0.0008,
"step": 1006
},
{
"epoch": 10.07,
"learning_rate": 1.3262827070931717e-06,
"loss": 0.0004,
"step": 1007
},
{
"epoch": 10.08,
"learning_rate": 1.3128826348184886e-06,
"loss": 0.0009,
"step": 1008
},
{
"epoch": 10.09,
"learning_rate": 1.299545842942992e-06,
"loss": 0.0007,
"step": 1009
},
{
"epoch": 10.1,
"learning_rate": 1.286272428617147e-06,
"loss": 0.0003,
"step": 1010
},
{
"epoch": 10.11,
"learning_rate": 1.2730624885297537e-06,
"loss": 0.0006,
"step": 1011
},
{
"epoch": 10.12,
"learning_rate": 1.2599161189072428e-06,
"loss": 0.0004,
"step": 1012
},
{
"epoch": 10.13,
"learning_rate": 1.2468334155129702e-06,
"loss": 0.0005,
"step": 1013
},
{
"epoch": 10.14,
"learning_rate": 1.233814473646524e-06,
"loss": 0.0005,
"step": 1014
},
{
"epoch": 10.15,
"learning_rate": 1.2208593881430242e-06,
"loss": 0.0005,
"step": 1015
},
{
"epoch": 10.16,
"learning_rate": 1.207968253372438e-06,
"loss": 0.0005,
"step": 1016
},
{
"epoch": 10.17,
"learning_rate": 1.195141163238892e-06,
"loss": 0.0004,
"step": 1017
},
{
"epoch": 10.18,
"learning_rate": 1.1823782111799843e-06,
"loss": 0.0009,
"step": 1018
},
{
"epoch": 10.19,
"learning_rate": 1.169679490166108e-06,
"loss": 0.0005,
"step": 1019
},
{
"epoch": 10.2,
"learning_rate": 1.1570450926997657e-06,
"loss": 0.0004,
"step": 1020
},
{
"epoch": 10.21,
"learning_rate": 1.1444751108149077e-06,
"loss": 0.0005,
"step": 1021
},
{
"epoch": 10.22,
"learning_rate": 1.1319696360762566e-06,
"loss": 0.0006,
"step": 1022
},
{
"epoch": 10.23,
"learning_rate": 1.1195287595786352e-06,
"loss": 0.0007,
"step": 1023
},
{
"epoch": 10.24,
"learning_rate": 1.1071525719463094e-06,
"loss": 0.0007,
"step": 1024
},
{
"epoch": 10.25,
"learning_rate": 1.0948411633323285e-06,
"loss": 0.0008,
"step": 1025
},
{
"epoch": 10.26,
"learning_rate": 1.0825946234178575e-06,
"loss": 0.0007,
"step": 1026
},
{
"epoch": 10.27,
"learning_rate": 1.0704130414115387e-06,
"loss": 0.0005,
"step": 1027
},
{
"epoch": 10.28,
"learning_rate": 1.058296506048836e-06,
"loss": 0.0003,
"step": 1028
},
{
"epoch": 10.29,
"learning_rate": 1.0462451055913847e-06,
"loss": 0.0005,
"step": 1029
},
{
"epoch": 10.3,
"learning_rate": 1.034258927826356e-06,
"loss": 0.0007,
"step": 1030
},
{
"epoch": 10.31,
"learning_rate": 1.022338060065804e-06,
"loss": 0.0008,
"step": 1031
},
{
"epoch": 10.32,
"learning_rate": 1.010482589146048e-06,
"loss": 0.0007,
"step": 1032
},
{
"epoch": 10.33,
"learning_rate": 9.98692601427028e-07,
"loss": 0.0007,
"step": 1033
},
{
"epoch": 10.34,
"learning_rate": 9.869681827916777e-07,
"loss": 0.0005,
"step": 1034
},
{
"epoch": 10.35,
"learning_rate": 9.753094186453028e-07,
"loss": 0.0006,
"step": 1035
},
{
"epoch": 10.36,
"learning_rate": 9.637163939149485e-07,
"loss": 0.0004,
"step": 1036
},
{
"epoch": 10.37,
"learning_rate": 9.521891930487925e-07,
"loss": 0.0006,
"step": 1037
},
{
"epoch": 10.38,
"learning_rate": 9.407279000155311e-07,
"loss": 0.0008,
"step": 1038
},
{
"epoch": 10.39,
"learning_rate": 9.293325983037549e-07,
"loss": 0.0006,
"step": 1039
},
{
"epoch": 10.4,
"learning_rate": 9.180033709213454e-07,
"loss": 0.0005,
"step": 1040
},
{
"epoch": 10.41,
"learning_rate": 9.067403003948783e-07,
"loss": 0.0008,
"step": 1041
},
{
"epoch": 10.42,
"learning_rate": 8.955434687690157e-07,
"loss": 0.0009,
"step": 1042
},
{
"epoch": 10.43,
"learning_rate": 8.844129576059069e-07,
"loss": 0.0004,
"step": 1043
},
{
"epoch": 10.44,
"learning_rate": 8.733488479845997e-07,
"loss": 0.0006,
"step": 1044
},
{
"epoch": 10.45,
"learning_rate": 8.623512205004425e-07,
"loss": 0.0008,
"step": 1045
},
{
"epoch": 10.46,
"learning_rate": 8.514201552645052e-07,
"loss": 0.0005,
"step": 1046
},
{
"epoch": 10.47,
"learning_rate": 8.405557319029911e-07,
"loss": 0.0007,
"step": 1047
},
{
"epoch": 10.48,
"learning_rate": 8.297580295566576e-07,
"loss": 0.0008,
"step": 1048
},
{
"epoch": 10.49,
"learning_rate": 8.190271268802397e-07,
"loss": 0.0003,
"step": 1049
},
{
"epoch": 10.5,
"learning_rate": 8.083631020418792e-07,
"loss": 0.0003,
"step": 1050
},
{
"epoch": 10.51,
"learning_rate": 7.977660327225467e-07,
"loss": 0.0008,
"step": 1051
},
{
"epoch": 10.52,
"learning_rate": 7.872359961154907e-07,
"loss": 0.0006,
"step": 1052
},
{
"epoch": 10.53,
"learning_rate": 7.767730689256614e-07,
"loss": 0.0009,
"step": 1053
},
{
"epoch": 10.54,
"learning_rate": 7.663773273691599e-07,
"loss": 0.0007,
"step": 1054
},
{
"epoch": 10.55,
"learning_rate": 7.560488471726824e-07,
"loss": 0.0007,
"step": 1055
},
{
"epoch": 10.56,
"learning_rate": 7.457877035729588e-07,
"loss": 0.0007,
"step": 1056
},
{
"epoch": 10.57,
"learning_rate": 7.355939713162219e-07,
"loss": 0.0006,
"step": 1057
},
{
"epoch": 10.58,
"learning_rate": 7.25467724657647e-07,
"loss": 0.001,
"step": 1058
},
{
"epoch": 10.59,
"learning_rate": 7.154090373608236e-07,
"loss": 0.0005,
"step": 1059
},
{
"epoch": 10.6,
"learning_rate": 7.054179826972074e-07,
"loss": 0.0007,
"step": 1060
},
{
"epoch": 10.61,
"learning_rate": 6.954946334455914e-07,
"loss": 0.0004,
"step": 1061
},
{
"epoch": 10.62,
"learning_rate": 6.856390618915775e-07,
"loss": 0.0006,
"step": 1062
},
{
"epoch": 10.63,
"learning_rate": 6.758513398270483e-07,
"loss": 0.0004,
"step": 1063
},
{
"epoch": 10.64,
"learning_rate": 6.661315385496426e-07,
"loss": 0.0008,
"step": 1064
},
{
"epoch": 10.65,
"learning_rate": 6.564797288622371e-07,
"loss": 0.0009,
"step": 1065
},
{
"epoch": 10.66,
"learning_rate": 6.468959810724329e-07,
"loss": 0.0004,
"step": 1066
},
{
"epoch": 10.67,
"learning_rate": 6.373803649920385e-07,
"loss": 0.0006,
"step": 1067
},
{
"epoch": 10.68,
"learning_rate": 6.279329499365649e-07,
"loss": 0.0005,
"step": 1068
},
{
"epoch": 10.69,
"learning_rate": 6.185538047247208e-07,
"loss": 0.0005,
"step": 1069
},
{
"epoch": 10.7,
"learning_rate": 6.092429976779113e-07,
"loss": 0.0007,
"step": 1070
},
{
"epoch": 10.71,
"learning_rate": 6.000005966197387e-07,
"loss": 0.0007,
"step": 1071
},
{
"epoch": 10.72,
"learning_rate": 5.908266688755049e-07,
"loss": 0.0007,
"step": 1072
},
{
"epoch": 10.73,
"learning_rate": 5.817212812717276e-07,
"loss": 0.0005,
"step": 1073
},
{
"epoch": 10.74,
"learning_rate": 5.726845001356573e-07,
"loss": 0.0005,
"step": 1074
},
{
"epoch": 10.75,
"learning_rate": 5.637163912947808e-07,
"loss": 0.0006,
"step": 1075
},
{
"epoch": 10.76,
"learning_rate": 5.54817020076347e-07,
"loss": 0.0005,
"step": 1076
},
{
"epoch": 10.77,
"learning_rate": 5.459864513068991e-07,
"loss": 0.0006,
"step": 1077
},
{
"epoch": 10.78,
"learning_rate": 5.372247493117921e-07,
"loss": 0.0007,
"step": 1078
},
{
"epoch": 10.79,
"learning_rate": 5.28531977914728e-07,
"loss": 0.0006,
"step": 1079
},
{
"epoch": 10.8,
"learning_rate": 5.199082004372958e-07,
"loss": 0.0004,
"step": 1080
},
{
"epoch": 10.81,
"learning_rate": 5.113534796984976e-07,
"loss": 0.0006,
"step": 1081
},
{
"epoch": 10.82,
"learning_rate": 5.028678780143059e-07,
"loss": 0.0004,
"step": 1082
},
{
"epoch": 10.83,
"learning_rate": 4.944514571971981e-07,
"loss": 0.0005,
"step": 1083
},
{
"epoch": 10.84,
"learning_rate": 4.861042785557147e-07,
"loss": 0.0007,
"step": 1084
},
{
"epoch": 10.85,
"learning_rate": 4.778264028940061e-07,
"loss": 0.0008,
"step": 1085
},
{
"epoch": 10.86,
"learning_rate": 4.696178905113913e-07,
"loss": 0.001,
"step": 1086
},
{
"epoch": 10.87,
"learning_rate": 4.6147880120192336e-07,
"loss": 0.0003,
"step": 1087
},
{
"epoch": 10.88,
"learning_rate": 4.534091942539476e-07,
"loss": 0.0006,
"step": 1088
},
{
"epoch": 10.89,
"learning_rate": 4.454091284496731e-07,
"loss": 0.0006,
"step": 1089
},
{
"epoch": 10.9,
"learning_rate": 4.374786620647442e-07,
"loss": 0.0005,
"step": 1090
},
{
"epoch": 10.91,
"learning_rate": 4.296178528678163e-07,
"loss": 0.0006,
"step": 1091
},
{
"epoch": 10.92,
"learning_rate": 4.218267581201296e-07,
"loss": 0.0005,
"step": 1092
},
{
"epoch": 10.93,
"learning_rate": 4.1410543457510165e-07,
"loss": 0.0006,
"step": 1093
},
{
"epoch": 10.94,
"learning_rate": 4.0645393847790873e-07,
"loss": 0.0004,
"step": 1094
},
{
"epoch": 10.95,
"learning_rate": 3.988723255650728e-07,
"loss": 0.0006,
"step": 1095
},
{
"epoch": 10.96,
"learning_rate": 3.913606510640644e-07,
"loss": 0.0007,
"step": 1096
},
{
"epoch": 10.97,
"learning_rate": 3.8391896969288913e-07,
"loss": 0.0005,
"step": 1097
},
{
"epoch": 10.98,
"learning_rate": 3.7654733565969826e-07,
"loss": 0.0006,
"step": 1098
},
{
"epoch": 10.99,
"learning_rate": 3.6924580266239016e-07,
"loss": 0.0006,
"step": 1099
},
{
"epoch": 11.0,
"learning_rate": 3.620144238882206e-07,
"loss": 0.0005,
"step": 1100
},
{
"epoch": 11.01,
"learning_rate": 3.548532520134129e-07,
"loss": 0.0004,
"step": 1101
},
{
"epoch": 11.02,
"learning_rate": 3.47762339202774e-07,
"loss": 0.0005,
"step": 1102
},
{
"epoch": 11.03,
"learning_rate": 3.4074173710931804e-07,
"loss": 0.0008,
"step": 1103
},
{
"epoch": 11.04,
"learning_rate": 3.3379149687388866e-07,
"loss": 0.0004,
"step": 1104
},
{
"epoch": 11.05,
"learning_rate": 3.2691166912478423e-07,
"loss": 0.0006,
"step": 1105
},
{
"epoch": 11.06,
"learning_rate": 3.2010230397739206e-07,
"loss": 0.0009,
"step": 1106
},
{
"epoch": 11.07,
"learning_rate": 3.133634510338235e-07,
"loss": 0.0004,
"step": 1107
},
{
"epoch": 11.08,
"learning_rate": 3.0669515938254404e-07,
"loss": 0.0008,
"step": 1108
},
{
"epoch": 11.09,
"learning_rate": 3.000974775980314e-07,
"loss": 0.0007,
"step": 1109
},
{
"epoch": 11.1,
"learning_rate": 2.935704537404083e-07,
"loss": 0.0006,
"step": 1110
},
{
"epoch": 11.11,
"learning_rate": 2.8711413535509993e-07,
"loss": 0.0006,
"step": 1111
},
{
"epoch": 11.12,
"learning_rate": 2.807285694724804e-07,
"loss": 0.0003,
"step": 1112
},
{
"epoch": 11.13,
"learning_rate": 2.744138026075405e-07,
"loss": 0.0006,
"step": 1113
},
{
"epoch": 11.14,
"learning_rate": 2.6816988075953787e-07,
"loss": 0.0005,
"step": 1114
},
{
"epoch": 11.15,
"learning_rate": 2.619968494116698e-07,
"loss": 0.0006,
"step": 1115
},
{
"epoch": 11.16,
"learning_rate": 2.5589475353073987e-07,
"loss": 0.0005,
"step": 1116
},
{
"epoch": 11.17,
"learning_rate": 2.498636375668262e-07,
"loss": 0.0008,
"step": 1117
},
{
"epoch": 11.18,
"learning_rate": 2.4390354545296257e-07,
"loss": 0.0007,
"step": 1118
},
{
"epoch": 11.19,
"learning_rate": 2.380145206048201e-07,
"loss": 0.0008,
"step": 1119
},
{
"epoch": 11.2,
"learning_rate": 2.3219660592038285e-07,
"loss": 0.0006,
"step": 1120
},
{
"epoch": 11.21,
"learning_rate": 2.2644984377964584e-07,
"loss": 0.0006,
"step": 1121
},
{
"epoch": 11.22,
"learning_rate": 2.2077427604429435e-07,
"loss": 0.0006,
"step": 1122
},
{
"epoch": 11.23,
"learning_rate": 2.1516994405740953e-07,
"loss": 0.0005,
"step": 1123
},
{
"epoch": 11.24,
"learning_rate": 2.0963688864316324e-07,
"loss": 0.0006,
"step": 1124
},
{
"epoch": 11.25,
"learning_rate": 2.0417515010652032e-07,
"loss": 0.0003,
"step": 1125
},
{
"epoch": 11.26,
"learning_rate": 1.9878476823294467e-07,
"loss": 0.0005,
"step": 1126
},
{
"epoch": 11.27,
"learning_rate": 1.934657822881081e-07,
"loss": 0.0007,
"step": 1127
},
{
"epoch": 11.28,
"learning_rate": 1.8821823101760949e-07,
"loss": 0.0003,
"step": 1128
},
{
"epoch": 11.29,
"learning_rate": 1.8304215264668856e-07,
"loss": 0.0006,
"step": 1129
},
{
"epoch": 11.3,
"learning_rate": 1.7793758487994694e-07,
"loss": 0.0005,
"step": 1130
},
{
"epoch": 11.31,
"learning_rate": 1.7290456490107522e-07,
"loss": 0.0006,
"step": 1131
},
{
"epoch": 11.32,
"learning_rate": 1.6794312937258417e-07,
"loss": 0.0005,
"step": 1132
},
{
"epoch": 11.33,
"learning_rate": 1.630533144355284e-07,
"loss": 0.0008,
"step": 1133
},
{
"epoch": 11.34,
"learning_rate": 1.5823515570925763e-07,
"loss": 0.0007,
"step": 1134
},
{
"epoch": 11.35,
"learning_rate": 1.534886882911446e-07,
"loss": 0.0006,
"step": 1135
},
{
"epoch": 11.36,
"learning_rate": 1.4881394675633543e-07,
"loss": 0.0006,
"step": 1136
},
{
"epoch": 11.37,
"learning_rate": 1.4421096515749855e-07,
"loss": 0.0009,
"step": 1137
},
{
"epoch": 11.38,
"learning_rate": 1.3967977702456946e-07,
"loss": 0.0004,
"step": 1138
},
{
"epoch": 11.39,
"learning_rate": 1.3522041536451646e-07,
"loss": 0.0006,
"step": 1139
},
{
"epoch": 11.4,
"learning_rate": 1.30832912661093e-07,
"loss": 0.0005,
"step": 1140
},
{
"epoch": 11.41,
"learning_rate": 1.2651730087460678e-07,
"loss": 0.0006,
"step": 1141
},
{
"epoch": 11.42,
"learning_rate": 1.2227361144167892e-07,
"loss": 0.0001,
"step": 1142
},
{
"epoch": 11.43,
"learning_rate": 1.1810187527502182e-07,
"loss": 0.0005,
"step": 1143
},
{
"epoch": 11.44,
"learning_rate": 1.1400212276321377e-07,
"loss": 0.0002,
"step": 1144
},
{
"epoch": 11.45,
"learning_rate": 1.0997438377047143e-07,
"loss": 0.0003,
"step": 1145
},
{
"epoch": 11.46,
"learning_rate": 1.0601868763643997e-07,
"loss": 0.0005,
"step": 1146
},
{
"epoch": 11.47,
"learning_rate": 1.0213506317597543e-07,
"loss": 0.0004,
"step": 1147
},
{
"epoch": 11.48,
"learning_rate": 9.832353867893385e-08,
"loss": 0.0005,
"step": 1148
},
{
"epoch": 11.49,
"learning_rate": 9.45841419099669e-08,
"loss": 0.0005,
"step": 1149
},
{
"epoch": 11.5,
"learning_rate": 9.091690010831988e-08,
"loss": 0.0007,
"step": 1150
},
{
"epoch": 11.51,
"learning_rate": 8.732183998763411e-08,
"loss": 0.0008,
"step": 1151
},
{
"epoch": 11.52,
"learning_rate": 8.379898773574924e-08,
"loss": 0.0004,
"step": 1152
},
{
"epoch": 11.53,
"learning_rate": 8.034836901451238e-08,
"loss": 0.0006,
"step": 1153
},
{
"epoch": 11.54,
"learning_rate": 7.697000895959817e-08,
"loss": 0.0006,
"step": 1154
},
{
"epoch": 11.55,
"learning_rate": 7.366393218031564e-08,
"loss": 0.0004,
"step": 1155
},
{
"epoch": 11.56,
"learning_rate": 7.043016275943614e-08,
"loss": 0.0007,
"step": 1156
},
{
"epoch": 11.57,
"learning_rate": 6.726872425301567e-08,
"loss": 0.0005,
"step": 1157
},
{
"epoch": 11.58,
"learning_rate": 6.417963969022389e-08,
"loss": 0.0004,
"step": 1158
},
{
"epoch": 11.59,
"learning_rate": 6.11629315731721e-08,
"loss": 0.0007,
"step": 1159
},
{
"epoch": 11.6,
"learning_rate": 5.821862187675775e-08,
"loss": 0.0007,
"step": 1160
},
{
"epoch": 11.61,
"learning_rate": 5.534673204849572e-08,
"loss": 0.0005,
"step": 1161
},
{
"epoch": 11.62,
"learning_rate": 5.2547283008369534e-08,
"loss": 0.0007,
"step": 1162
},
{
"epoch": 11.63,
"learning_rate": 4.9820295148671484e-08,
"loss": 0.0008,
"step": 1163
},
{
"epoch": 11.64,
"learning_rate": 4.716578833386054e-08,
"loss": 0.0007,
"step": 1164
},
{
"epoch": 11.65,
"learning_rate": 4.458378190041357e-08,
"loss": 0.0004,
"step": 1165
},
{
"epoch": 11.66,
"learning_rate": 4.207429465668877e-08,
"loss": 0.0004,
"step": 1166
},
{
"epoch": 11.67,
"learning_rate": 3.963734488278248e-08,
"loss": 0.0005,
"step": 1167
},
{
"epoch": 11.68,
"learning_rate": 3.727295033040035e-08,
"loss": 0.001,
"step": 1168
},
{
"epoch": 11.69,
"learning_rate": 3.4981128222728586e-08,
"loss": 0.0008,
"step": 1169
},
{
"epoch": 11.7,
"learning_rate": 3.2761895254306285e-08,
"loss": 0.0004,
"step": 1170
},
{
"epoch": 11.71,
"learning_rate": 3.0615267590903276e-08,
"loss": 0.0003,
"step": 1171
},
{
"epoch": 11.72,
"learning_rate": 2.8541260869403565e-08,
"loss": 0.0005,
"step": 1172
},
{
"epoch": 11.73,
"learning_rate": 2.6539890197695428e-08,
"loss": 0.0005,
"step": 1173
},
{
"epoch": 11.74,
"learning_rate": 2.4611170154552612e-08,
"loss": 0.0004,
"step": 1174
},
{
"epoch": 11.75,
"learning_rate": 2.2755114789534406e-08,
"loss": 0.0006,
"step": 1175
},
{
"epoch": 11.76,
"learning_rate": 2.0971737622883515e-08,
"loss": 0.0004,
"step": 1176
},
{
"epoch": 11.77,
"learning_rate": 1.926105164542391e-08,
"loss": 0.0007,
"step": 1177
},
{
"epoch": 11.78,
"learning_rate": 1.7623069318469797e-08,
"loss": 0.0007,
"step": 1178
},
{
"epoch": 11.79,
"learning_rate": 1.605780257373124e-08,
"loss": 0.0006,
"step": 1179
},
{
"epoch": 11.8,
"learning_rate": 1.4565262813230896e-08,
"loss": 0.0003,
"step": 1180
},
{
"epoch": 11.81,
"learning_rate": 1.3145460909218532e-08,
"loss": 0.0007,
"step": 1181
},
{
"epoch": 11.82,
"learning_rate": 1.179840720409331e-08,
"loss": 0.0008,
"step": 1182
},
{
"epoch": 11.83,
"learning_rate": 1.0524111510326062e-08,
"loss": 0.0006,
"step": 1183
},
{
"epoch": 11.84,
"learning_rate": 9.322583110392692e-09,
"loss": 0.0004,
"step": 1184
},
{
"epoch": 11.85,
"learning_rate": 8.193830756699773e-09,
"loss": 0.0008,
"step": 1185
},
{
"epoch": 11.86,
"learning_rate": 7.1378626715268295e-09,
"loss": 0.0006,
"step": 1186
},
{
"epoch": 11.87,
"learning_rate": 6.1546865469630516e-09,
"loss": 0.0005,
"step": 1187
},
{
"epoch": 11.88,
"learning_rate": 5.2443095448506674e-09,
"loss": 0.0008,
"step": 1188
},
{
"epoch": 11.89,
"learning_rate": 4.406738296738322e-09,
"loss": 0.0004,
"step": 1189
},
{
"epoch": 11.9,
"learning_rate": 3.64197890382445e-09,
"loss": 0.0008,
"step": 1190
},
{
"epoch": 11.91,
"learning_rate": 2.9500369369195313e-09,
"loss": 0.0007,
"step": 1191
},
{
"epoch": 11.92,
"learning_rate": 2.330917436402791e-09,
"loss": 0.0004,
"step": 1192
},
{
"epoch": 11.93,
"learning_rate": 1.7846249121855619e-09,
"loss": 0.0003,
"step": 1193
},
{
"epoch": 11.94,
"learning_rate": 1.3111633436779792e-09,
"loss": 0.0007,
"step": 1194
},
{
"epoch": 11.95,
"learning_rate": 9.105361797623335e-10,
"loss": 0.0005,
"step": 1195
},
{
"epoch": 11.96,
"learning_rate": 5.827463387653165e-10,
"loss": 0.0009,
"step": 1196
},
{
"epoch": 11.97,
"learning_rate": 3.277962084369257e-10,
"loss": 0.0007,
"step": 1197
},
{
"epoch": 11.98,
"learning_rate": 1.4568764593603235e-10,
"loss": 0.0004,
"step": 1198
},
{
"epoch": 11.99,
"learning_rate": 3.6421977811507135e-11,
"loss": 0.0003,
"step": 1199
},
{
"epoch": 12.0,
"learning_rate": 0.0,
"loss": 0.0004,
"step": 1200
},
{
"epoch": 12.0,
"step": 1200,
"total_flos": 8.425804598626746e+18,
"train_loss": 0.02877667422716816,
"train_runtime": 17282.8975,
"train_samples_per_second": 8.836,
"train_steps_per_second": 0.069
}
],
"max_steps": 1200,
"num_train_epochs": 12,
"total_flos": 8.425804598626746e+18,
"trial_name": null,
"trial_params": null
}