GATEAU-1k-100k / trainer_state.json
ssz1111's picture
upload
43dd06a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9890601690701144,
"eval_steps": 500,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.278688524590164e-07,
"loss": 1.1631,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 6.557377049180328e-07,
"loss": 1.8625,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 9.836065573770493e-07,
"loss": 1.0719,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 1.3114754098360657e-06,
"loss": 0.9863,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 1.6393442622950819e-06,
"loss": 1.2453,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.9672131147540985e-06,
"loss": 1.0458,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.295081967213115e-06,
"loss": 1.1643,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 2.6229508196721314e-06,
"loss": 0.9423,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 2.9508196721311478e-06,
"loss": 0.9232,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 3.2786885245901638e-06,
"loss": 1.1195,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.6065573770491806e-06,
"loss": 0.829,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 3.934426229508197e-06,
"loss": 1.1953,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 4.2622950819672135e-06,
"loss": 1.8621,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 4.59016393442623e-06,
"loss": 0.9418,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 4.918032786885246e-06,
"loss": 0.8859,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 5.245901639344263e-06,
"loss": 1.1402,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 5.573770491803278e-06,
"loss": 0.9034,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 5.9016393442622956e-06,
"loss": 1.4775,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 6.229508196721312e-06,
"loss": 1.4377,
"step": 19
},
{
"epoch": 0.02,
"learning_rate": 6.5573770491803276e-06,
"loss": 0.8942,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 6.885245901639345e-06,
"loss": 0.9499,
"step": 21
},
{
"epoch": 0.02,
"learning_rate": 7.213114754098361e-06,
"loss": 0.8495,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 7.540983606557377e-06,
"loss": 1.5032,
"step": 23
},
{
"epoch": 0.02,
"learning_rate": 7.868852459016394e-06,
"loss": 1.8468,
"step": 24
},
{
"epoch": 0.02,
"learning_rate": 8.19672131147541e-06,
"loss": 1.7923,
"step": 25
},
{
"epoch": 0.03,
"learning_rate": 8.524590163934427e-06,
"loss": 1.0237,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 8.852459016393443e-06,
"loss": 0.7109,
"step": 27
},
{
"epoch": 0.03,
"learning_rate": 9.18032786885246e-06,
"loss": 0.9663,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 9.508196721311476e-06,
"loss": 1.0116,
"step": 29
},
{
"epoch": 0.03,
"learning_rate": 9.836065573770493e-06,
"loss": 1.3598,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 1.0163934426229509e-05,
"loss": 1.0879,
"step": 31
},
{
"epoch": 0.03,
"learning_rate": 1.0491803278688525e-05,
"loss": 1.0015,
"step": 32
},
{
"epoch": 0.03,
"learning_rate": 1.0819672131147544e-05,
"loss": 0.9813,
"step": 33
},
{
"epoch": 0.03,
"learning_rate": 1.1147540983606557e-05,
"loss": 1.0592,
"step": 34
},
{
"epoch": 0.03,
"learning_rate": 1.1475409836065575e-05,
"loss": 1.0062,
"step": 35
},
{
"epoch": 0.04,
"learning_rate": 1.1803278688524591e-05,
"loss": 0.8501,
"step": 36
},
{
"epoch": 0.04,
"learning_rate": 1.2131147540983608e-05,
"loss": 0.8943,
"step": 37
},
{
"epoch": 0.04,
"learning_rate": 1.2459016393442624e-05,
"loss": 1.4784,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 1.2786885245901642e-05,
"loss": 1.2013,
"step": 39
},
{
"epoch": 0.04,
"learning_rate": 1.3114754098360655e-05,
"loss": 1.0282,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 1.3442622950819673e-05,
"loss": 0.9437,
"step": 41
},
{
"epoch": 0.04,
"learning_rate": 1.377049180327869e-05,
"loss": 1.6735,
"step": 42
},
{
"epoch": 0.04,
"learning_rate": 1.4098360655737706e-05,
"loss": 0.7945,
"step": 43
},
{
"epoch": 0.04,
"learning_rate": 1.4426229508196722e-05,
"loss": 1.3146,
"step": 44
},
{
"epoch": 0.04,
"learning_rate": 1.4754098360655739e-05,
"loss": 1.0985,
"step": 45
},
{
"epoch": 0.05,
"learning_rate": 1.5081967213114754e-05,
"loss": 0.6569,
"step": 46
},
{
"epoch": 0.05,
"learning_rate": 1.5409836065573772e-05,
"loss": 0.8684,
"step": 47
},
{
"epoch": 0.05,
"learning_rate": 1.5737704918032788e-05,
"loss": 1.2233,
"step": 48
},
{
"epoch": 0.05,
"learning_rate": 1.6065573770491805e-05,
"loss": 0.7584,
"step": 49
},
{
"epoch": 0.05,
"learning_rate": 1.639344262295082e-05,
"loss": 0.8788,
"step": 50
},
{
"epoch": 0.05,
"learning_rate": 1.6721311475409837e-05,
"loss": 0.8581,
"step": 51
},
{
"epoch": 0.05,
"learning_rate": 1.7049180327868854e-05,
"loss": 0.925,
"step": 52
},
{
"epoch": 0.05,
"learning_rate": 1.737704918032787e-05,
"loss": 0.704,
"step": 53
},
{
"epoch": 0.05,
"learning_rate": 1.7704918032786887e-05,
"loss": 0.9357,
"step": 54
},
{
"epoch": 0.05,
"learning_rate": 1.8032786885245903e-05,
"loss": 1.0651,
"step": 55
},
{
"epoch": 0.06,
"learning_rate": 1.836065573770492e-05,
"loss": 0.8416,
"step": 56
},
{
"epoch": 0.06,
"learning_rate": 1.8688524590163936e-05,
"loss": 0.801,
"step": 57
},
{
"epoch": 0.06,
"learning_rate": 1.9016393442622952e-05,
"loss": 0.9371,
"step": 58
},
{
"epoch": 0.06,
"learning_rate": 1.934426229508197e-05,
"loss": 0.8922,
"step": 59
},
{
"epoch": 0.06,
"learning_rate": 1.9672131147540985e-05,
"loss": 0.8713,
"step": 60
},
{
"epoch": 0.06,
"learning_rate": 2e-05,
"loss": 0.8037,
"step": 61
},
{
"epoch": 0.06,
"learning_rate": 1.9999987008898457e-05,
"loss": 0.8481,
"step": 62
},
{
"epoch": 0.06,
"learning_rate": 1.9999948035627574e-05,
"loss": 0.9219,
"step": 63
},
{
"epoch": 0.06,
"learning_rate": 1.9999883080288618e-05,
"loss": 1.0054,
"step": 64
},
{
"epoch": 0.06,
"learning_rate": 1.9999792143050352e-05,
"loss": 1.3275,
"step": 65
},
{
"epoch": 0.07,
"learning_rate": 1.9999675224149054e-05,
"loss": 1.6285,
"step": 66
},
{
"epoch": 0.07,
"learning_rate": 1.9999532323888507e-05,
"loss": 1.0542,
"step": 67
},
{
"epoch": 0.07,
"learning_rate": 1.9999363442639997e-05,
"loss": 0.8898,
"step": 68
},
{
"epoch": 0.07,
"learning_rate": 1.999916858084231e-05,
"loss": 0.9377,
"step": 69
},
{
"epoch": 0.07,
"learning_rate": 1.9998947739001743e-05,
"loss": 1.3403,
"step": 70
},
{
"epoch": 0.07,
"learning_rate": 1.999870091769209e-05,
"loss": 0.9542,
"step": 71
},
{
"epoch": 0.07,
"learning_rate": 1.999842811755465e-05,
"loss": 0.7805,
"step": 72
},
{
"epoch": 0.07,
"learning_rate": 1.9998129339298217e-05,
"loss": 0.9154,
"step": 73
},
{
"epoch": 0.07,
"learning_rate": 1.999780458369908e-05,
"loss": 0.8465,
"step": 74
},
{
"epoch": 0.07,
"learning_rate": 1.999745385160103e-05,
"loss": 0.8779,
"step": 75
},
{
"epoch": 0.08,
"learning_rate": 1.9997077143915345e-05,
"loss": 0.9247,
"step": 76
},
{
"epoch": 0.08,
"learning_rate": 1.999667446162079e-05,
"loss": 1.4597,
"step": 77
},
{
"epoch": 0.08,
"learning_rate": 1.9996245805763628e-05,
"loss": 0.8224,
"step": 78
},
{
"epoch": 0.08,
"learning_rate": 1.9995791177457598e-05,
"loss": 0.7869,
"step": 79
},
{
"epoch": 0.08,
"learning_rate": 1.9995310577883928e-05,
"loss": 0.7324,
"step": 80
},
{
"epoch": 0.08,
"learning_rate": 1.999480400829132e-05,
"loss": 0.8845,
"step": 81
},
{
"epoch": 0.08,
"learning_rate": 1.9994271469995953e-05,
"loss": 1.1086,
"step": 82
},
{
"epoch": 0.08,
"learning_rate": 1.9993712964381475e-05,
"loss": 0.9462,
"step": 83
},
{
"epoch": 0.08,
"learning_rate": 1.9993128492899012e-05,
"loss": 0.8624,
"step": 84
},
{
"epoch": 0.08,
"learning_rate": 1.999251805706715e-05,
"loss": 0.8404,
"step": 85
},
{
"epoch": 0.09,
"learning_rate": 1.999188165847193e-05,
"loss": 0.9484,
"step": 86
},
{
"epoch": 0.09,
"learning_rate": 1.9991219298766862e-05,
"loss": 1.0643,
"step": 87
},
{
"epoch": 0.09,
"learning_rate": 1.99905309796729e-05,
"loss": 0.6939,
"step": 88
},
{
"epoch": 0.09,
"learning_rate": 1.9989816702978447e-05,
"loss": 0.7965,
"step": 89
},
{
"epoch": 0.09,
"learning_rate": 1.998907647053935e-05,
"loss": 0.817,
"step": 90
},
{
"epoch": 0.09,
"learning_rate": 1.9988310284278904e-05,
"loss": 0.6953,
"step": 91
},
{
"epoch": 0.09,
"learning_rate": 1.9987518146187825e-05,
"loss": 1.1733,
"step": 92
},
{
"epoch": 0.09,
"learning_rate": 1.998670005832426e-05,
"loss": 0.826,
"step": 93
},
{
"epoch": 0.09,
"learning_rate": 1.998585602281378e-05,
"loss": 0.9648,
"step": 94
},
{
"epoch": 0.09,
"learning_rate": 1.9984986041849378e-05,
"loss": 0.9012,
"step": 95
},
{
"epoch": 0.1,
"learning_rate": 1.998409011769146e-05,
"loss": 1.3652,
"step": 96
},
{
"epoch": 0.1,
"learning_rate": 1.9983168252667832e-05,
"loss": 1.3381,
"step": 97
},
{
"epoch": 0.1,
"learning_rate": 1.99822204491737e-05,
"loss": 0.9126,
"step": 98
},
{
"epoch": 0.1,
"learning_rate": 1.9981246709671668e-05,
"loss": 0.9528,
"step": 99
},
{
"epoch": 0.1,
"learning_rate": 1.9980247036691723e-05,
"loss": 0.8905,
"step": 100
},
{
"epoch": 0.1,
"learning_rate": 1.997922143283124e-05,
"loss": 0.9624,
"step": 101
},
{
"epoch": 0.1,
"learning_rate": 1.997816990075496e-05,
"loss": 0.9044,
"step": 102
},
{
"epoch": 0.1,
"learning_rate": 1.9977092443194997e-05,
"loss": 0.8365,
"step": 103
},
{
"epoch": 0.1,
"learning_rate": 1.9975989062950828e-05,
"loss": 1.061,
"step": 104
},
{
"epoch": 0.1,
"learning_rate": 1.997485976288927e-05,
"loss": 0.737,
"step": 105
},
{
"epoch": 0.11,
"learning_rate": 1.9973704545944494e-05,
"loss": 0.8608,
"step": 106
},
{
"epoch": 0.11,
"learning_rate": 1.9972523415118012e-05,
"loss": 0.8978,
"step": 107
},
{
"epoch": 0.11,
"learning_rate": 1.997131637347866e-05,
"loss": 0.875,
"step": 108
},
{
"epoch": 0.11,
"learning_rate": 1.9970083424162598e-05,
"loss": 1.3394,
"step": 109
},
{
"epoch": 0.11,
"learning_rate": 1.99688245703733e-05,
"loss": 1.3804,
"step": 110
},
{
"epoch": 0.11,
"learning_rate": 1.996753981538155e-05,
"loss": 0.7778,
"step": 111
},
{
"epoch": 0.11,
"learning_rate": 1.9966229162525417e-05,
"loss": 0.9996,
"step": 112
},
{
"epoch": 0.11,
"learning_rate": 1.996489261521027e-05,
"loss": 1.0724,
"step": 113
},
{
"epoch": 0.11,
"learning_rate": 1.9963530176908752e-05,
"loss": 1.2232,
"step": 114
},
{
"epoch": 0.11,
"learning_rate": 1.9962141851160778e-05,
"loss": 0.8759,
"step": 115
},
{
"epoch": 0.12,
"learning_rate": 1.996072764157353e-05,
"loss": 1.0849,
"step": 116
},
{
"epoch": 0.12,
"learning_rate": 1.9959287551821425e-05,
"loss": 0.9536,
"step": 117
},
{
"epoch": 0.12,
"learning_rate": 1.9957821585646143e-05,
"loss": 1.3268,
"step": 118
},
{
"epoch": 0.12,
"learning_rate": 1.9956329746856583e-05,
"loss": 0.9561,
"step": 119
},
{
"epoch": 0.12,
"learning_rate": 1.9954812039328868e-05,
"loss": 0.8386,
"step": 120
},
{
"epoch": 0.12,
"learning_rate": 1.995326846700634e-05,
"loss": 2.018,
"step": 121
},
{
"epoch": 0.12,
"learning_rate": 1.9951699033899544e-05,
"loss": 0.9314,
"step": 122
},
{
"epoch": 0.12,
"learning_rate": 1.9950103744086205e-05,
"loss": 0.9947,
"step": 123
},
{
"epoch": 0.12,
"learning_rate": 1.9948482601711245e-05,
"loss": 0.9588,
"step": 124
},
{
"epoch": 0.12,
"learning_rate": 1.994683561098674e-05,
"loss": 1.3748,
"step": 125
},
{
"epoch": 0.13,
"learning_rate": 1.9945162776191946e-05,
"loss": 1.5894,
"step": 126
},
{
"epoch": 0.13,
"learning_rate": 1.9943464101673245e-05,
"loss": 1.2297,
"step": 127
},
{
"epoch": 0.13,
"learning_rate": 1.9941739591844173e-05,
"loss": 0.8366,
"step": 128
},
{
"epoch": 0.13,
"learning_rate": 1.9939989251185386e-05,
"loss": 0.817,
"step": 129
},
{
"epoch": 0.13,
"learning_rate": 1.9938213084244657e-05,
"loss": 0.7562,
"step": 130
},
{
"epoch": 0.13,
"learning_rate": 1.9936411095636855e-05,
"loss": 0.7552,
"step": 131
},
{
"epoch": 0.13,
"learning_rate": 1.993458329004395e-05,
"loss": 0.8523,
"step": 132
},
{
"epoch": 0.13,
"learning_rate": 1.9932729672214975e-05,
"loss": 0.8466,
"step": 133
},
{
"epoch": 0.13,
"learning_rate": 1.993085024696604e-05,
"loss": 0.914,
"step": 134
},
{
"epoch": 0.13,
"learning_rate": 1.992894501918031e-05,
"loss": 0.7882,
"step": 135
},
{
"epoch": 0.14,
"learning_rate": 1.9927013993807985e-05,
"loss": 1.0892,
"step": 136
},
{
"epoch": 0.14,
"learning_rate": 1.992505717586629e-05,
"loss": 0.8238,
"step": 137
},
{
"epoch": 0.14,
"learning_rate": 1.9923074570439476e-05,
"loss": 1.4146,
"step": 138
},
{
"epoch": 0.14,
"learning_rate": 1.992106618267878e-05,
"loss": 0.9236,
"step": 139
},
{
"epoch": 0.14,
"learning_rate": 1.9919032017802445e-05,
"loss": 0.8065,
"step": 140
},
{
"epoch": 0.14,
"learning_rate": 1.9916972081095672e-05,
"loss": 0.9458,
"step": 141
},
{
"epoch": 0.14,
"learning_rate": 1.9914886377910637e-05,
"loss": 0.898,
"step": 142
},
{
"epoch": 0.14,
"learning_rate": 1.991277491366645e-05,
"loss": 0.783,
"step": 143
},
{
"epoch": 0.14,
"learning_rate": 1.9910637693849166e-05,
"loss": 0.7972,
"step": 144
},
{
"epoch": 0.14,
"learning_rate": 1.9908474724011752e-05,
"loss": 1.448,
"step": 145
},
{
"epoch": 0.15,
"learning_rate": 1.9906286009774074e-05,
"loss": 0.9717,
"step": 146
},
{
"epoch": 0.15,
"learning_rate": 1.99040715568229e-05,
"loss": 0.7907,
"step": 147
},
{
"epoch": 0.15,
"learning_rate": 1.9901831370911866e-05,
"loss": 0.9144,
"step": 148
},
{
"epoch": 0.15,
"learning_rate": 1.9899565457861463e-05,
"loss": 0.835,
"step": 149
},
{
"epoch": 0.15,
"learning_rate": 1.989727382355904e-05,
"loss": 0.9172,
"step": 150
},
{
"epoch": 0.15,
"learning_rate": 1.9894956473958768e-05,
"loss": 0.9481,
"step": 151
},
{
"epoch": 0.15,
"learning_rate": 1.9892613415081623e-05,
"loss": 0.766,
"step": 152
},
{
"epoch": 0.15,
"learning_rate": 1.9890244653015397e-05,
"loss": 0.8229,
"step": 153
},
{
"epoch": 0.15,
"learning_rate": 1.988785019391465e-05,
"loss": 0.7748,
"step": 154
},
{
"epoch": 0.15,
"learning_rate": 1.9885430044000715e-05,
"loss": 0.8486,
"step": 155
},
{
"epoch": 0.16,
"learning_rate": 1.988298420956168e-05,
"loss": 0.8516,
"step": 156
},
{
"epoch": 0.16,
"learning_rate": 1.9880512696952356e-05,
"loss": 0.8075,
"step": 157
},
{
"epoch": 0.16,
"learning_rate": 1.9878015512594276e-05,
"loss": 0.6981,
"step": 158
},
{
"epoch": 0.16,
"learning_rate": 1.987549266297568e-05,
"loss": 1.5784,
"step": 159
},
{
"epoch": 0.16,
"learning_rate": 1.987294415465149e-05,
"loss": 0.7499,
"step": 160
},
{
"epoch": 0.16,
"learning_rate": 1.9870369994243283e-05,
"loss": 0.957,
"step": 161
},
{
"epoch": 0.16,
"learning_rate": 1.9867770188439302e-05,
"loss": 0.7369,
"step": 162
},
{
"epoch": 0.16,
"learning_rate": 1.986514474399441e-05,
"loss": 0.8588,
"step": 163
},
{
"epoch": 0.16,
"learning_rate": 1.986249366773009e-05,
"loss": 0.6999,
"step": 164
},
{
"epoch": 0.16,
"learning_rate": 1.985981696653443e-05,
"loss": 0.7267,
"step": 165
},
{
"epoch": 0.17,
"learning_rate": 1.9857114647362082e-05,
"loss": 0.8667,
"step": 166
},
{
"epoch": 0.17,
"learning_rate": 1.9854386717234265e-05,
"loss": 0.7941,
"step": 167
},
{
"epoch": 0.17,
"learning_rate": 1.985163318323875e-05,
"loss": 0.973,
"step": 168
},
{
"epoch": 0.17,
"learning_rate": 1.9848854052529822e-05,
"loss": 0.7735,
"step": 169
},
{
"epoch": 0.17,
"learning_rate": 1.984604933232827e-05,
"loss": 0.8267,
"step": 170
},
{
"epoch": 0.17,
"learning_rate": 1.9843219029921387e-05,
"loss": 0.9171,
"step": 171
},
{
"epoch": 0.17,
"learning_rate": 1.9840363152662906e-05,
"loss": 1.4763,
"step": 172
},
{
"epoch": 0.17,
"learning_rate": 1.9837481707973037e-05,
"loss": 0.9691,
"step": 173
},
{
"epoch": 0.17,
"learning_rate": 1.9834574703338406e-05,
"loss": 0.9034,
"step": 174
},
{
"epoch": 0.17,
"learning_rate": 1.9831642146312043e-05,
"loss": 0.887,
"step": 175
},
{
"epoch": 0.18,
"learning_rate": 1.9828684044513392e-05,
"loss": 0.983,
"step": 176
},
{
"epoch": 0.18,
"learning_rate": 1.9825700405628245e-05,
"loss": 0.831,
"step": 177
},
{
"epoch": 0.18,
"learning_rate": 1.9822691237408747e-05,
"loss": 0.7617,
"step": 178
},
{
"epoch": 0.18,
"learning_rate": 1.9819656547673393e-05,
"loss": 1.2565,
"step": 179
},
{
"epoch": 0.18,
"learning_rate": 1.9816596344306965e-05,
"loss": 1.4362,
"step": 180
},
{
"epoch": 0.18,
"learning_rate": 1.981351063526055e-05,
"loss": 1.2248,
"step": 181
},
{
"epoch": 0.18,
"learning_rate": 1.98103994285515e-05,
"loss": 0.9034,
"step": 182
},
{
"epoch": 0.18,
"learning_rate": 1.9807262732263417e-05,
"loss": 0.7415,
"step": 183
},
{
"epoch": 0.18,
"learning_rate": 1.9804100554546127e-05,
"loss": 0.832,
"step": 184
},
{
"epoch": 0.18,
"learning_rate": 1.980091290361566e-05,
"loss": 1.0681,
"step": 185
},
{
"epoch": 0.18,
"learning_rate": 1.9797699787754243e-05,
"loss": 0.8454,
"step": 186
},
{
"epoch": 0.19,
"learning_rate": 1.9794461215310258e-05,
"loss": 0.7861,
"step": 187
},
{
"epoch": 0.19,
"learning_rate": 1.979119719469822e-05,
"loss": 0.9102,
"step": 188
},
{
"epoch": 0.19,
"learning_rate": 1.9787907734398785e-05,
"loss": 0.71,
"step": 189
},
{
"epoch": 0.19,
"learning_rate": 1.9784592842958693e-05,
"loss": 0.9857,
"step": 190
},
{
"epoch": 0.19,
"learning_rate": 1.978125252899076e-05,
"loss": 0.7978,
"step": 191
},
{
"epoch": 0.19,
"learning_rate": 1.977788680117386e-05,
"loss": 0.829,
"step": 192
},
{
"epoch": 0.19,
"learning_rate": 1.977449566825289e-05,
"loss": 0.7571,
"step": 193
},
{
"epoch": 0.19,
"learning_rate": 1.9771079139038765e-05,
"loss": 0.7473,
"step": 194
},
{
"epoch": 0.19,
"learning_rate": 1.976763722240838e-05,
"loss": 0.7078,
"step": 195
},
{
"epoch": 0.19,
"learning_rate": 1.9764169927304594e-05,
"loss": 0.7293,
"step": 196
},
{
"epoch": 0.2,
"learning_rate": 1.9760677262736202e-05,
"loss": 0.8904,
"step": 197
},
{
"epoch": 0.2,
"learning_rate": 1.9757159237777916e-05,
"loss": 0.7294,
"step": 198
},
{
"epoch": 0.2,
"learning_rate": 1.9753615861570338e-05,
"loss": 0.7273,
"step": 199
},
{
"epoch": 0.2,
"learning_rate": 1.975004714331994e-05,
"loss": 0.6732,
"step": 200
},
{
"epoch": 0.2,
"learning_rate": 1.9746453092299042e-05,
"loss": 0.8208,
"step": 201
},
{
"epoch": 0.2,
"learning_rate": 1.974283371784578e-05,
"loss": 0.6795,
"step": 202
},
{
"epoch": 0.2,
"learning_rate": 1.9739189029364083e-05,
"loss": 0.8884,
"step": 203
},
{
"epoch": 0.2,
"learning_rate": 1.9735519036323656e-05,
"loss": 0.7186,
"step": 204
},
{
"epoch": 0.2,
"learning_rate": 1.9731823748259953e-05,
"loss": 0.7591,
"step": 205
},
{
"epoch": 0.2,
"learning_rate": 1.972810317477414e-05,
"loss": 0.7703,
"step": 206
},
{
"epoch": 0.21,
"learning_rate": 1.972435732553309e-05,
"loss": 0.9294,
"step": 207
},
{
"epoch": 0.21,
"learning_rate": 1.9720586210269347e-05,
"loss": 0.6991,
"step": 208
},
{
"epoch": 0.21,
"learning_rate": 1.9716789838781095e-05,
"loss": 0.662,
"step": 209
},
{
"epoch": 0.21,
"learning_rate": 1.9712968220932144e-05,
"loss": 0.6357,
"step": 210
},
{
"epoch": 0.21,
"learning_rate": 1.97091213666519e-05,
"loss": 0.6902,
"step": 211
},
{
"epoch": 0.21,
"learning_rate": 1.9705249285935344e-05,
"loss": 0.7826,
"step": 212
},
{
"epoch": 0.21,
"learning_rate": 1.9701351988842987e-05,
"loss": 0.6604,
"step": 213
},
{
"epoch": 0.21,
"learning_rate": 1.9697429485500862e-05,
"loss": 0.6934,
"step": 214
},
{
"epoch": 0.21,
"learning_rate": 1.9693481786100506e-05,
"loss": 0.7027,
"step": 215
},
{
"epoch": 0.21,
"learning_rate": 1.9689508900898907e-05,
"loss": 0.7562,
"step": 216
},
{
"epoch": 0.22,
"learning_rate": 1.9685510840218497e-05,
"loss": 0.7664,
"step": 217
},
{
"epoch": 0.22,
"learning_rate": 1.968148761444712e-05,
"loss": 0.5945,
"step": 218
},
{
"epoch": 0.22,
"learning_rate": 1.9677439234038004e-05,
"loss": 0.7876,
"step": 219
},
{
"epoch": 0.22,
"learning_rate": 1.9673365709509723e-05,
"loss": 0.6742,
"step": 220
},
{
"epoch": 0.22,
"learning_rate": 1.9669267051446208e-05,
"loss": 0.9253,
"step": 221
},
{
"epoch": 0.22,
"learning_rate": 1.9665143270496658e-05,
"loss": 0.6862,
"step": 222
},
{
"epoch": 0.22,
"learning_rate": 1.9660994377375578e-05,
"loss": 0.7811,
"step": 223
},
{
"epoch": 0.22,
"learning_rate": 1.96568203828627e-05,
"loss": 0.7367,
"step": 224
},
{
"epoch": 0.22,
"learning_rate": 1.965262129780298e-05,
"loss": 0.68,
"step": 225
},
{
"epoch": 0.22,
"learning_rate": 1.964839713310657e-05,
"loss": 0.7343,
"step": 226
},
{
"epoch": 0.23,
"learning_rate": 1.9644147899748784e-05,
"loss": 0.6716,
"step": 227
},
{
"epoch": 0.23,
"learning_rate": 1.963987360877005e-05,
"loss": 0.6795,
"step": 228
},
{
"epoch": 0.23,
"learning_rate": 1.963557427127594e-05,
"loss": 0.69,
"step": 229
},
{
"epoch": 0.23,
"learning_rate": 1.9631249898437066e-05,
"loss": 0.7206,
"step": 230
},
{
"epoch": 0.23,
"learning_rate": 1.9626900501489102e-05,
"loss": 0.679,
"step": 231
},
{
"epoch": 0.23,
"learning_rate": 1.9622526091732745e-05,
"loss": 0.7709,
"step": 232
},
{
"epoch": 0.23,
"learning_rate": 1.9618126680533672e-05,
"loss": 0.7466,
"step": 233
},
{
"epoch": 0.23,
"learning_rate": 1.9613702279322518e-05,
"loss": 0.6064,
"step": 234
},
{
"epoch": 0.23,
"learning_rate": 1.9609252899594863e-05,
"loss": 0.6615,
"step": 235
},
{
"epoch": 0.23,
"learning_rate": 1.9604778552911167e-05,
"loss": 0.7465,
"step": 236
},
{
"epoch": 0.24,
"learning_rate": 1.960027925089677e-05,
"loss": 0.7212,
"step": 237
},
{
"epoch": 0.24,
"learning_rate": 1.9595755005241853e-05,
"loss": 0.8214,
"step": 238
},
{
"epoch": 0.24,
"learning_rate": 1.95912058277014e-05,
"loss": 0.6861,
"step": 239
},
{
"epoch": 0.24,
"learning_rate": 1.9586631730095175e-05,
"loss": 0.7779,
"step": 240
},
{
"epoch": 0.24,
"learning_rate": 1.95820327243077e-05,
"loss": 0.686,
"step": 241
},
{
"epoch": 0.24,
"learning_rate": 1.9577408822288193e-05,
"loss": 0.679,
"step": 242
},
{
"epoch": 0.24,
"learning_rate": 1.9572760036050577e-05,
"loss": 0.7466,
"step": 243
},
{
"epoch": 0.24,
"learning_rate": 1.9568086377673422e-05,
"loss": 0.8037,
"step": 244
},
{
"epoch": 0.24,
"learning_rate": 1.956338785929992e-05,
"loss": 0.6762,
"step": 245
},
{
"epoch": 0.24,
"learning_rate": 1.9558664493137863e-05,
"loss": 0.8955,
"step": 246
},
{
"epoch": 0.25,
"learning_rate": 1.955391629145959e-05,
"loss": 0.7347,
"step": 247
},
{
"epoch": 0.25,
"learning_rate": 1.9549143266601977e-05,
"loss": 0.7446,
"step": 248
},
{
"epoch": 0.25,
"learning_rate": 1.9544345430966398e-05,
"loss": 0.7133,
"step": 249
},
{
"epoch": 0.25,
"learning_rate": 1.9539522797018682e-05,
"loss": 0.7182,
"step": 250
},
{
"epoch": 0.25,
"learning_rate": 1.9534675377289094e-05,
"loss": 0.6604,
"step": 251
},
{
"epoch": 0.25,
"learning_rate": 1.9529803184372302e-05,
"loss": 0.6004,
"step": 252
},
{
"epoch": 0.25,
"learning_rate": 1.9524906230927338e-05,
"loss": 0.6171,
"step": 253
},
{
"epoch": 0.25,
"learning_rate": 1.951998452967756e-05,
"loss": 0.6518,
"step": 254
},
{
"epoch": 0.25,
"learning_rate": 1.9515038093410633e-05,
"loss": 0.7039,
"step": 255
},
{
"epoch": 0.25,
"learning_rate": 1.9510066934978496e-05,
"loss": 0.679,
"step": 256
},
{
"epoch": 0.26,
"learning_rate": 1.9505071067297304e-05,
"loss": 0.7146,
"step": 257
},
{
"epoch": 0.26,
"learning_rate": 1.950005050334743e-05,
"loss": 0.6795,
"step": 258
},
{
"epoch": 0.26,
"learning_rate": 1.9495005256173398e-05,
"loss": 0.7601,
"step": 259
},
{
"epoch": 0.26,
"learning_rate": 1.9489935338883876e-05,
"loss": 0.641,
"step": 260
},
{
"epoch": 0.26,
"learning_rate": 1.9484840764651624e-05,
"loss": 0.6869,
"step": 261
},
{
"epoch": 0.26,
"learning_rate": 1.9479721546713472e-05,
"loss": 0.6668,
"step": 262
},
{
"epoch": 0.26,
"learning_rate": 1.947457769837027e-05,
"loss": 0.6938,
"step": 263
},
{
"epoch": 0.26,
"learning_rate": 1.9469409232986876e-05,
"loss": 0.6747,
"step": 264
},
{
"epoch": 0.26,
"learning_rate": 1.9464216163992097e-05,
"loss": 0.6927,
"step": 265
},
{
"epoch": 0.26,
"learning_rate": 1.945899850487867e-05,
"loss": 0.7093,
"step": 266
},
{
"epoch": 0.27,
"learning_rate": 1.9453756269203225e-05,
"loss": 0.6612,
"step": 267
},
{
"epoch": 0.27,
"learning_rate": 1.9448489470586245e-05,
"loss": 0.6994,
"step": 268
},
{
"epoch": 0.27,
"learning_rate": 1.9443198122712036e-05,
"loss": 0.6974,
"step": 269
},
{
"epoch": 0.27,
"learning_rate": 1.9437882239328675e-05,
"loss": 0.7176,
"step": 270
},
{
"epoch": 0.27,
"learning_rate": 1.9432541834248014e-05,
"loss": 0.6262,
"step": 271
},
{
"epoch": 0.27,
"learning_rate": 1.9427176921345587e-05,
"loss": 0.7263,
"step": 272
},
{
"epoch": 0.27,
"learning_rate": 1.9421787514560625e-05,
"loss": 0.6594,
"step": 273
},
{
"epoch": 0.27,
"learning_rate": 1.9416373627896002e-05,
"loss": 0.6773,
"step": 274
},
{
"epoch": 0.27,
"learning_rate": 1.9410935275418177e-05,
"loss": 0.7264,
"step": 275
},
{
"epoch": 0.27,
"learning_rate": 1.940547247125719e-05,
"loss": 0.7352,
"step": 276
},
{
"epoch": 0.28,
"learning_rate": 1.9399985229606616e-05,
"loss": 0.6537,
"step": 277
},
{
"epoch": 0.28,
"learning_rate": 1.9394473564723515e-05,
"loss": 0.673,
"step": 278
},
{
"epoch": 0.28,
"learning_rate": 1.9388937490928402e-05,
"loss": 0.6809,
"step": 279
},
{
"epoch": 0.28,
"learning_rate": 1.938337702260522e-05,
"loss": 0.6064,
"step": 280
},
{
"epoch": 0.28,
"learning_rate": 1.9377792174201295e-05,
"loss": 0.6943,
"step": 281
},
{
"epoch": 0.28,
"learning_rate": 1.9372182960227284e-05,
"loss": 0.7056,
"step": 282
},
{
"epoch": 0.28,
"learning_rate": 1.9366549395257167e-05,
"loss": 0.6422,
"step": 283
},
{
"epoch": 0.28,
"learning_rate": 1.9360891493928186e-05,
"loss": 0.7447,
"step": 284
},
{
"epoch": 0.28,
"learning_rate": 1.9355209270940817e-05,
"loss": 0.7322,
"step": 285
},
{
"epoch": 0.28,
"learning_rate": 1.934950274105872e-05,
"loss": 0.6957,
"step": 286
},
{
"epoch": 0.29,
"learning_rate": 1.9343771919108726e-05,
"loss": 0.6163,
"step": 287
},
{
"epoch": 0.29,
"learning_rate": 1.933801681998077e-05,
"loss": 0.6324,
"step": 288
},
{
"epoch": 0.29,
"learning_rate": 1.933223745862786e-05,
"loss": 0.7091,
"step": 289
},
{
"epoch": 0.29,
"learning_rate": 1.932643385006606e-05,
"loss": 0.6288,
"step": 290
},
{
"epoch": 0.29,
"learning_rate": 1.9320606009374418e-05,
"loss": 0.7056,
"step": 291
},
{
"epoch": 0.29,
"learning_rate": 1.9314753951694952e-05,
"loss": 0.6282,
"step": 292
},
{
"epoch": 0.29,
"learning_rate": 1.9308877692232592e-05,
"loss": 0.6591,
"step": 293
},
{
"epoch": 0.29,
"learning_rate": 1.930297724625516e-05,
"loss": 0.6914,
"step": 294
},
{
"epoch": 0.29,
"learning_rate": 1.9297052629093312e-05,
"loss": 0.6352,
"step": 295
},
{
"epoch": 0.29,
"learning_rate": 1.929110385614051e-05,
"loss": 0.6771,
"step": 296
},
{
"epoch": 0.3,
"learning_rate": 1.9285130942852975e-05,
"loss": 0.7305,
"step": 297
},
{
"epoch": 0.3,
"learning_rate": 1.927913390474965e-05,
"loss": 0.7441,
"step": 298
},
{
"epoch": 0.3,
"learning_rate": 1.9273112757412165e-05,
"loss": 0.6964,
"step": 299
},
{
"epoch": 0.3,
"learning_rate": 1.926706751648479e-05,
"loss": 0.6472,
"step": 300
},
{
"epoch": 0.3,
"learning_rate": 1.9260998197674385e-05,
"loss": 0.6361,
"step": 301
},
{
"epoch": 0.3,
"learning_rate": 1.9254904816750376e-05,
"loss": 0.689,
"step": 302
},
{
"epoch": 0.3,
"learning_rate": 1.9248787389544722e-05,
"loss": 0.6399,
"step": 303
},
{
"epoch": 0.3,
"learning_rate": 1.9242645931951833e-05,
"loss": 0.6407,
"step": 304
},
{
"epoch": 0.3,
"learning_rate": 1.9236480459928573e-05,
"loss": 0.7198,
"step": 305
},
{
"epoch": 0.3,
"learning_rate": 1.9230290989494203e-05,
"loss": 0.6972,
"step": 306
},
{
"epoch": 0.31,
"learning_rate": 1.9224077536730323e-05,
"loss": 0.6848,
"step": 307
},
{
"epoch": 0.31,
"learning_rate": 1.9217840117780857e-05,
"loss": 0.6266,
"step": 308
},
{
"epoch": 0.31,
"learning_rate": 1.921157874885199e-05,
"loss": 0.7203,
"step": 309
},
{
"epoch": 0.31,
"learning_rate": 1.9205293446212137e-05,
"loss": 0.6186,
"step": 310
},
{
"epoch": 0.31,
"learning_rate": 1.9198984226191905e-05,
"loss": 0.7296,
"step": 311
},
{
"epoch": 0.31,
"learning_rate": 1.9192651105184032e-05,
"loss": 0.6269,
"step": 312
},
{
"epoch": 0.31,
"learning_rate": 1.9186294099643367e-05,
"loss": 0.7291,
"step": 313
},
{
"epoch": 0.31,
"learning_rate": 1.91799132260868e-05,
"loss": 0.7009,
"step": 314
},
{
"epoch": 0.31,
"learning_rate": 1.917350850109326e-05,
"loss": 0.6599,
"step": 315
},
{
"epoch": 0.31,
"learning_rate": 1.916707994130363e-05,
"loss": 0.6374,
"step": 316
},
{
"epoch": 0.32,
"learning_rate": 1.9160627563420714e-05,
"loss": 0.6662,
"step": 317
},
{
"epoch": 0.32,
"learning_rate": 1.915415138420922e-05,
"loss": 0.6952,
"step": 318
},
{
"epoch": 0.32,
"learning_rate": 1.9147651420495696e-05,
"loss": 0.6496,
"step": 319
},
{
"epoch": 0.32,
"learning_rate": 1.9141127689168465e-05,
"loss": 0.6784,
"step": 320
},
{
"epoch": 0.32,
"learning_rate": 1.9134580207177625e-05,
"loss": 0.6576,
"step": 321
},
{
"epoch": 0.32,
"learning_rate": 1.9128008991534975e-05,
"loss": 0.6595,
"step": 322
},
{
"epoch": 0.32,
"learning_rate": 1.9121414059313985e-05,
"loss": 0.6332,
"step": 323
},
{
"epoch": 0.32,
"learning_rate": 1.9114795427649735e-05,
"loss": 0.6668,
"step": 324
},
{
"epoch": 0.32,
"learning_rate": 1.9108153113738896e-05,
"loss": 0.6232,
"step": 325
},
{
"epoch": 0.32,
"learning_rate": 1.9101487134839657e-05,
"loss": 0.6506,
"step": 326
},
{
"epoch": 0.33,
"learning_rate": 1.9094797508271702e-05,
"loss": 0.6441,
"step": 327
},
{
"epoch": 0.33,
"learning_rate": 1.9088084251416154e-05,
"loss": 0.6531,
"step": 328
},
{
"epoch": 0.33,
"learning_rate": 1.9081347381715535e-05,
"loss": 0.6683,
"step": 329
},
{
"epoch": 0.33,
"learning_rate": 1.9074586916673716e-05,
"loss": 0.6775,
"step": 330
},
{
"epoch": 0.33,
"learning_rate": 1.9067802873855875e-05,
"loss": 0.6849,
"step": 331
},
{
"epoch": 0.33,
"learning_rate": 1.9060995270888444e-05,
"loss": 0.6931,
"step": 332
},
{
"epoch": 0.33,
"learning_rate": 1.9054164125459084e-05,
"loss": 0.6496,
"step": 333
},
{
"epoch": 0.33,
"learning_rate": 1.904730945531661e-05,
"loss": 0.66,
"step": 334
},
{
"epoch": 0.33,
"learning_rate": 1.9040431278270967e-05,
"loss": 0.6834,
"step": 335
},
{
"epoch": 0.33,
"learning_rate": 1.9033529612193177e-05,
"loss": 0.7538,
"step": 336
},
{
"epoch": 0.34,
"learning_rate": 1.9026604475015283e-05,
"loss": 0.5976,
"step": 337
},
{
"epoch": 0.34,
"learning_rate": 1.901965588473032e-05,
"loss": 0.6549,
"step": 338
},
{
"epoch": 0.34,
"learning_rate": 1.901268385939226e-05,
"loss": 0.6582,
"step": 339
},
{
"epoch": 0.34,
"learning_rate": 1.9005688417115954e-05,
"loss": 0.6617,
"step": 340
},
{
"epoch": 0.34,
"learning_rate": 1.899866957607711e-05,
"loss": 0.6522,
"step": 341
},
{
"epoch": 0.34,
"learning_rate": 1.8991627354512213e-05,
"loss": 0.976,
"step": 342
},
{
"epoch": 0.34,
"learning_rate": 1.898456177071852e-05,
"loss": 0.6434,
"step": 343
},
{
"epoch": 0.34,
"learning_rate": 1.8977472843053962e-05,
"loss": 0.676,
"step": 344
},
{
"epoch": 0.34,
"learning_rate": 1.8970360589937138e-05,
"loss": 0.7308,
"step": 345
},
{
"epoch": 0.34,
"learning_rate": 1.8963225029847252e-05,
"loss": 0.5997,
"step": 346
},
{
"epoch": 0.35,
"learning_rate": 1.895606618132406e-05,
"loss": 0.7292,
"step": 347
},
{
"epoch": 0.35,
"learning_rate": 1.8948884062967823e-05,
"loss": 0.6467,
"step": 348
},
{
"epoch": 0.35,
"learning_rate": 1.8941678693439272e-05,
"loss": 0.5916,
"step": 349
},
{
"epoch": 0.35,
"learning_rate": 1.8934450091459544e-05,
"loss": 0.6801,
"step": 350
},
{
"epoch": 0.35,
"learning_rate": 1.892719827581014e-05,
"loss": 0.6193,
"step": 351
},
{
"epoch": 0.35,
"learning_rate": 1.8919923265332865e-05,
"loss": 0.7392,
"step": 352
},
{
"epoch": 0.35,
"learning_rate": 1.8912625078929814e-05,
"loss": 0.7519,
"step": 353
},
{
"epoch": 0.35,
"learning_rate": 1.8905303735563274e-05,
"loss": 0.6561,
"step": 354
},
{
"epoch": 0.35,
"learning_rate": 1.8897959254255715e-05,
"loss": 0.7031,
"step": 355
},
{
"epoch": 0.35,
"learning_rate": 1.8890591654089705e-05,
"loss": 0.6356,
"step": 356
},
{
"epoch": 0.36,
"learning_rate": 1.8883200954207903e-05,
"loss": 0.6446,
"step": 357
},
{
"epoch": 0.36,
"learning_rate": 1.887578717381297e-05,
"loss": 0.6155,
"step": 358
},
{
"epoch": 0.36,
"learning_rate": 1.886835033216755e-05,
"loss": 0.7146,
"step": 359
},
{
"epoch": 0.36,
"learning_rate": 1.8860890448594182e-05,
"loss": 0.6414,
"step": 360
},
{
"epoch": 0.36,
"learning_rate": 1.88534075424753e-05,
"loss": 0.6314,
"step": 361
},
{
"epoch": 0.36,
"learning_rate": 1.8845901633253132e-05,
"loss": 0.7255,
"step": 362
},
{
"epoch": 0.36,
"learning_rate": 1.8838372740429693e-05,
"loss": 0.6845,
"step": 363
},
{
"epoch": 0.36,
"learning_rate": 1.88308208835667e-05,
"loss": 0.6271,
"step": 364
},
{
"epoch": 0.36,
"learning_rate": 1.882324608228554e-05,
"loss": 0.6901,
"step": 365
},
{
"epoch": 0.36,
"learning_rate": 1.881564835626722e-05,
"loss": 0.6465,
"step": 366
},
{
"epoch": 0.36,
"learning_rate": 1.8808027725252297e-05,
"loss": 0.6494,
"step": 367
},
{
"epoch": 0.37,
"learning_rate": 1.8800384209040864e-05,
"loss": 0.6207,
"step": 368
},
{
"epoch": 0.37,
"learning_rate": 1.8792717827492446e-05,
"loss": 0.6646,
"step": 369
},
{
"epoch": 0.37,
"learning_rate": 1.8785028600526e-05,
"loss": 0.657,
"step": 370
},
{
"epoch": 0.37,
"learning_rate": 1.877731654811983e-05,
"loss": 0.7176,
"step": 371
},
{
"epoch": 0.37,
"learning_rate": 1.876958169031154e-05,
"loss": 0.6358,
"step": 372
},
{
"epoch": 0.37,
"learning_rate": 1.8761824047198003e-05,
"loss": 0.7008,
"step": 373
},
{
"epoch": 0.37,
"learning_rate": 1.8754043638935283e-05,
"loss": 0.6676,
"step": 374
},
{
"epoch": 0.37,
"learning_rate": 1.8746240485738595e-05,
"loss": 0.6825,
"step": 375
},
{
"epoch": 0.37,
"learning_rate": 1.873841460788225e-05,
"loss": 0.6394,
"step": 376
},
{
"epoch": 0.37,
"learning_rate": 1.87305660256996e-05,
"loss": 0.6404,
"step": 377
},
{
"epoch": 0.38,
"learning_rate": 1.8722694759582992e-05,
"loss": 0.6467,
"step": 378
},
{
"epoch": 0.38,
"learning_rate": 1.871480082998371e-05,
"loss": 0.6852,
"step": 379
},
{
"epoch": 0.38,
"learning_rate": 1.8706884257411925e-05,
"loss": 0.615,
"step": 380
},
{
"epoch": 0.38,
"learning_rate": 1.8698945062436633e-05,
"loss": 0.6319,
"step": 381
},
{
"epoch": 0.38,
"learning_rate": 1.869098326568561e-05,
"loss": 0.6404,
"step": 382
},
{
"epoch": 0.38,
"learning_rate": 1.868299888784536e-05,
"loss": 0.6251,
"step": 383
},
{
"epoch": 0.38,
"learning_rate": 1.867499194966106e-05,
"loss": 0.6187,
"step": 384
},
{
"epoch": 0.38,
"learning_rate": 1.8666962471936498e-05,
"loss": 0.6202,
"step": 385
},
{
"epoch": 0.38,
"learning_rate": 1.8658910475534016e-05,
"loss": 0.6843,
"step": 386
},
{
"epoch": 0.38,
"learning_rate": 1.865083598137449e-05,
"loss": 0.593,
"step": 387
},
{
"epoch": 0.39,
"learning_rate": 1.864273901043722e-05,
"loss": 0.6261,
"step": 388
},
{
"epoch": 0.39,
"learning_rate": 1.8634619583759933e-05,
"loss": 0.6289,
"step": 389
},
{
"epoch": 0.39,
"learning_rate": 1.862647772243868e-05,
"loss": 0.7258,
"step": 390
},
{
"epoch": 0.39,
"learning_rate": 1.8618313447627815e-05,
"loss": 0.5747,
"step": 391
},
{
"epoch": 0.39,
"learning_rate": 1.861012678053992e-05,
"loss": 0.6668,
"step": 392
},
{
"epoch": 0.39,
"learning_rate": 1.860191774244576e-05,
"loss": 0.6911,
"step": 393
},
{
"epoch": 0.39,
"learning_rate": 1.8593686354674223e-05,
"loss": 0.6535,
"step": 394
},
{
"epoch": 0.39,
"learning_rate": 1.8585432638612272e-05,
"loss": 0.6716,
"step": 395
},
{
"epoch": 0.39,
"learning_rate": 1.8577156615704877e-05,
"loss": 0.6456,
"step": 396
},
{
"epoch": 0.39,
"learning_rate": 1.856885830745497e-05,
"loss": 0.6516,
"step": 397
},
{
"epoch": 0.4,
"learning_rate": 1.856053773542338e-05,
"loss": 0.6124,
"step": 398
},
{
"epoch": 0.4,
"learning_rate": 1.8552194921228793e-05,
"loss": 0.6507,
"step": 399
},
{
"epoch": 0.4,
"learning_rate": 1.8543829886547674e-05,
"loss": 0.6223,
"step": 400
},
{
"epoch": 0.4,
"learning_rate": 1.8535442653114228e-05,
"loss": 0.6274,
"step": 401
},
{
"epoch": 0.4,
"learning_rate": 1.8527033242720328e-05,
"loss": 0.6839,
"step": 402
},
{
"epoch": 0.4,
"learning_rate": 1.8518601677215488e-05,
"loss": 0.675,
"step": 403
},
{
"epoch": 0.4,
"learning_rate": 1.851014797850676e-05,
"loss": 0.657,
"step": 404
},
{
"epoch": 0.4,
"learning_rate": 1.8501672168558726e-05,
"loss": 0.695,
"step": 405
},
{
"epoch": 0.4,
"learning_rate": 1.84931742693934e-05,
"loss": 0.6864,
"step": 406
},
{
"epoch": 0.4,
"learning_rate": 1.84846543030902e-05,
"loss": 0.6229,
"step": 407
},
{
"epoch": 0.41,
"learning_rate": 1.8476112291785876e-05,
"loss": 0.6588,
"step": 408
},
{
"epoch": 0.41,
"learning_rate": 1.8467548257674453e-05,
"loss": 0.6368,
"step": 409
},
{
"epoch": 0.41,
"learning_rate": 1.8458962223007177e-05,
"loss": 0.6543,
"step": 410
},
{
"epoch": 0.41,
"learning_rate": 1.845035421009246e-05,
"loss": 0.6985,
"step": 411
},
{
"epoch": 0.41,
"learning_rate": 1.844172424129582e-05,
"loss": 0.6677,
"step": 412
},
{
"epoch": 0.41,
"learning_rate": 1.843307233903981e-05,
"loss": 0.7123,
"step": 413
},
{
"epoch": 0.41,
"learning_rate": 1.8424398525803983e-05,
"loss": 0.6168,
"step": 414
},
{
"epoch": 0.41,
"learning_rate": 1.8415702824124812e-05,
"loss": 0.7392,
"step": 415
},
{
"epoch": 0.41,
"learning_rate": 1.8406985256595652e-05,
"loss": 0.6636,
"step": 416
},
{
"epoch": 0.41,
"learning_rate": 1.8398245845866657e-05,
"loss": 0.6216,
"step": 417
},
{
"epoch": 0.42,
"learning_rate": 1.8389484614644746e-05,
"loss": 0.6685,
"step": 418
},
{
"epoch": 0.42,
"learning_rate": 1.8380701585693526e-05,
"loss": 0.5938,
"step": 419
},
{
"epoch": 0.42,
"learning_rate": 1.8371896781833242e-05,
"loss": 0.6134,
"step": 420
},
{
"epoch": 0.42,
"learning_rate": 1.8363070225940714e-05,
"loss": 0.637,
"step": 421
},
{
"epoch": 0.42,
"learning_rate": 1.8354221940949282e-05,
"loss": 0.8702,
"step": 422
},
{
"epoch": 0.42,
"learning_rate": 1.8345351949848735e-05,
"loss": 0.7037,
"step": 423
},
{
"epoch": 0.42,
"learning_rate": 1.8336460275685267e-05,
"loss": 0.6907,
"step": 424
},
{
"epoch": 0.42,
"learning_rate": 1.8327546941561403e-05,
"loss": 0.7027,
"step": 425
},
{
"epoch": 0.42,
"learning_rate": 1.8318611970635953e-05,
"loss": 0.6431,
"step": 426
},
{
"epoch": 0.42,
"learning_rate": 1.8309655386123935e-05,
"loss": 0.7247,
"step": 427
},
{
"epoch": 0.43,
"learning_rate": 1.8300677211296532e-05,
"loss": 0.6558,
"step": 428
},
{
"epoch": 0.43,
"learning_rate": 1.8291677469481025e-05,
"loss": 0.7563,
"step": 429
},
{
"epoch": 0.43,
"learning_rate": 1.8282656184060713e-05,
"loss": 0.6238,
"step": 430
},
{
"epoch": 0.43,
"learning_rate": 1.82736133784749e-05,
"loss": 0.6946,
"step": 431
},
{
"epoch": 0.43,
"learning_rate": 1.826454907621877e-05,
"loss": 0.6227,
"step": 432
},
{
"epoch": 0.43,
"learning_rate": 1.825546330084339e-05,
"loss": 0.6576,
"step": 433
},
{
"epoch": 0.43,
"learning_rate": 1.8246356075955594e-05,
"loss": 0.6126,
"step": 434
},
{
"epoch": 0.43,
"learning_rate": 1.8237227425217976e-05,
"loss": 0.7517,
"step": 435
},
{
"epoch": 0.43,
"learning_rate": 1.8228077372348765e-05,
"loss": 0.6504,
"step": 436
},
{
"epoch": 0.43,
"learning_rate": 1.8218905941121824e-05,
"loss": 0.6934,
"step": 437
},
{
"epoch": 0.44,
"learning_rate": 1.8209713155366546e-05,
"loss": 0.614,
"step": 438
},
{
"epoch": 0.44,
"learning_rate": 1.820049903896782e-05,
"loss": 0.6809,
"step": 439
},
{
"epoch": 0.44,
"learning_rate": 1.8191263615865942e-05,
"loss": 0.6117,
"step": 440
},
{
"epoch": 0.44,
"learning_rate": 1.8182006910056582e-05,
"loss": 0.62,
"step": 441
},
{
"epoch": 0.44,
"learning_rate": 1.8172728945590702e-05,
"loss": 0.7094,
"step": 442
},
{
"epoch": 0.44,
"learning_rate": 1.8163429746574493e-05,
"loss": 0.6945,
"step": 443
},
{
"epoch": 0.44,
"learning_rate": 1.8154109337169326e-05,
"loss": 0.6693,
"step": 444
},
{
"epoch": 0.44,
"learning_rate": 1.8144767741591676e-05,
"loss": 0.5903,
"step": 445
},
{
"epoch": 0.44,
"learning_rate": 1.8135404984113067e-05,
"loss": 0.643,
"step": 446
},
{
"epoch": 0.44,
"learning_rate": 1.812602108906001e-05,
"loss": 0.6264,
"step": 447
},
{
"epoch": 0.45,
"learning_rate": 1.8116616080813925e-05,
"loss": 0.6724,
"step": 448
},
{
"epoch": 0.45,
"learning_rate": 1.8107189983811094e-05,
"loss": 0.6087,
"step": 449
},
{
"epoch": 0.45,
"learning_rate": 1.8097742822542605e-05,
"loss": 0.6531,
"step": 450
},
{
"epoch": 0.45,
"learning_rate": 1.808827462155425e-05,
"loss": 0.5679,
"step": 451
},
{
"epoch": 0.45,
"learning_rate": 1.8078785405446516e-05,
"loss": 0.7,
"step": 452
},
{
"epoch": 0.45,
"learning_rate": 1.8069275198874464e-05,
"loss": 0.6416,
"step": 453
},
{
"epoch": 0.45,
"learning_rate": 1.8059744026547713e-05,
"loss": 0.6474,
"step": 454
},
{
"epoch": 0.45,
"learning_rate": 1.8050191913230345e-05,
"loss": 0.6297,
"step": 455
},
{
"epoch": 0.45,
"learning_rate": 1.804061888374086e-05,
"loss": 0.6923,
"step": 456
},
{
"epoch": 0.45,
"learning_rate": 1.803102496295209e-05,
"loss": 0.6188,
"step": 457
},
{
"epoch": 0.46,
"learning_rate": 1.802141017579116e-05,
"loss": 0.5695,
"step": 458
},
{
"epoch": 0.46,
"learning_rate": 1.8011774547239403e-05,
"loss": 0.6514,
"step": 459
},
{
"epoch": 0.46,
"learning_rate": 1.8002118102332308e-05,
"loss": 0.6491,
"step": 460
},
{
"epoch": 0.46,
"learning_rate": 1.7992440866159443e-05,
"loss": 0.68,
"step": 461
},
{
"epoch": 0.46,
"learning_rate": 1.79827428638644e-05,
"loss": 0.6495,
"step": 462
},
{
"epoch": 0.46,
"learning_rate": 1.797302412064473e-05,
"loss": 0.6987,
"step": 463
},
{
"epoch": 0.46,
"learning_rate": 1.796328466175186e-05,
"loss": 0.6891,
"step": 464
},
{
"epoch": 0.46,
"learning_rate": 1.795352451249106e-05,
"loss": 0.6765,
"step": 465
},
{
"epoch": 0.46,
"learning_rate": 1.7943743698221337e-05,
"loss": 0.6371,
"step": 466
},
{
"epoch": 0.46,
"learning_rate": 1.7933942244355415e-05,
"loss": 0.6979,
"step": 467
},
{
"epoch": 0.47,
"learning_rate": 1.792412017635962e-05,
"loss": 0.6372,
"step": 468
},
{
"epoch": 0.47,
"learning_rate": 1.791427751975385e-05,
"loss": 0.6674,
"step": 469
},
{
"epoch": 0.47,
"learning_rate": 1.7904414300111497e-05,
"loss": 0.6552,
"step": 470
},
{
"epoch": 0.47,
"learning_rate": 1.789453054305938e-05,
"loss": 0.6788,
"step": 471
},
{
"epoch": 0.47,
"learning_rate": 1.7884626274277674e-05,
"loss": 0.7142,
"step": 472
},
{
"epoch": 0.47,
"learning_rate": 1.7874701519499855e-05,
"loss": 0.5597,
"step": 473
},
{
"epoch": 0.47,
"learning_rate": 1.786475630451262e-05,
"loss": 0.6239,
"step": 474
},
{
"epoch": 0.47,
"learning_rate": 1.7854790655155827e-05,
"loss": 0.6677,
"step": 475
},
{
"epoch": 0.47,
"learning_rate": 1.784480459732243e-05,
"loss": 0.6527,
"step": 476
},
{
"epoch": 0.47,
"learning_rate": 1.783479815695841e-05,
"loss": 0.6342,
"step": 477
},
{
"epoch": 0.48,
"learning_rate": 1.78247713600627e-05,
"loss": 0.6251,
"step": 478
},
{
"epoch": 0.48,
"learning_rate": 1.781472423268713e-05,
"loss": 0.6716,
"step": 479
},
{
"epoch": 0.48,
"learning_rate": 1.780465680093635e-05,
"loss": 0.6483,
"step": 480
},
{
"epoch": 0.48,
"learning_rate": 1.7794569090967763e-05,
"loss": 0.6613,
"step": 481
},
{
"epoch": 0.48,
"learning_rate": 1.7784461128991465e-05,
"loss": 0.64,
"step": 482
},
{
"epoch": 0.48,
"learning_rate": 1.7774332941270165e-05,
"loss": 0.6742,
"step": 483
},
{
"epoch": 0.48,
"learning_rate": 1.776418455411913e-05,
"loss": 0.6902,
"step": 484
},
{
"epoch": 0.48,
"learning_rate": 1.7754015993906103e-05,
"loss": 0.6196,
"step": 485
},
{
"epoch": 0.48,
"learning_rate": 1.774382728705124e-05,
"loss": 0.6905,
"step": 486
},
{
"epoch": 0.48,
"learning_rate": 1.7733618460027055e-05,
"loss": 0.6993,
"step": 487
},
{
"epoch": 0.49,
"learning_rate": 1.7723389539358323e-05,
"loss": 0.5793,
"step": 488
},
{
"epoch": 0.49,
"learning_rate": 1.7713140551622032e-05,
"loss": 0.627,
"step": 489
},
{
"epoch": 0.49,
"learning_rate": 1.7702871523447317e-05,
"loss": 0.6394,
"step": 490
},
{
"epoch": 0.49,
"learning_rate": 1.7692582481515372e-05,
"loss": 0.6372,
"step": 491
},
{
"epoch": 0.49,
"learning_rate": 1.768227345255939e-05,
"loss": 0.6464,
"step": 492
},
{
"epoch": 0.49,
"learning_rate": 1.767194446336451e-05,
"loss": 0.6816,
"step": 493
},
{
"epoch": 0.49,
"learning_rate": 1.7661595540767714e-05,
"loss": 0.6807,
"step": 494
},
{
"epoch": 0.49,
"learning_rate": 1.7651226711657786e-05,
"loss": 0.661,
"step": 495
},
{
"epoch": 0.49,
"learning_rate": 1.7640838002975223e-05,
"loss": 0.5681,
"step": 496
},
{
"epoch": 0.49,
"learning_rate": 1.7630429441712184e-05,
"loss": 0.5885,
"step": 497
},
{
"epoch": 0.5,
"learning_rate": 1.76200010549124e-05,
"loss": 0.6487,
"step": 498
},
{
"epoch": 0.5,
"learning_rate": 1.7609552869671126e-05,
"loss": 0.6879,
"step": 499
},
{
"epoch": 0.5,
"learning_rate": 1.7599084913135038e-05,
"loss": 0.6085,
"step": 500
},
{
"epoch": 0.5,
"learning_rate": 1.7588597212502204e-05,
"loss": 0.6497,
"step": 501
},
{
"epoch": 0.5,
"learning_rate": 1.757808979502197e-05,
"loss": 0.6377,
"step": 502
},
{
"epoch": 0.5,
"learning_rate": 1.756756268799493e-05,
"loss": 0.6577,
"step": 503
},
{
"epoch": 0.5,
"learning_rate": 1.7557015918772822e-05,
"loss": 0.59,
"step": 504
},
{
"epoch": 0.5,
"learning_rate": 1.754644951475848e-05,
"loss": 0.6151,
"step": 505
},
{
"epoch": 0.5,
"learning_rate": 1.7535863503405743e-05,
"loss": 0.6877,
"step": 506
},
{
"epoch": 0.5,
"learning_rate": 1.7525257912219412e-05,
"loss": 0.6664,
"step": 507
},
{
"epoch": 0.51,
"learning_rate": 1.751463276875514e-05,
"loss": 0.6925,
"step": 508
},
{
"epoch": 0.51,
"learning_rate": 1.750398810061939e-05,
"loss": 0.7353,
"step": 509
},
{
"epoch": 0.51,
"learning_rate": 1.7493323935469364e-05,
"loss": 0.6527,
"step": 510
},
{
"epoch": 0.51,
"learning_rate": 1.7482640301012904e-05,
"loss": 0.6653,
"step": 511
},
{
"epoch": 0.51,
"learning_rate": 1.7471937225008447e-05,
"loss": 0.6105,
"step": 512
},
{
"epoch": 0.51,
"learning_rate": 1.7461214735264947e-05,
"loss": 0.663,
"step": 513
},
{
"epoch": 0.51,
"learning_rate": 1.745047285964179e-05,
"loss": 0.6139,
"step": 514
},
{
"epoch": 0.51,
"learning_rate": 1.7439711626048738e-05,
"loss": 0.7019,
"step": 515
},
{
"epoch": 0.51,
"learning_rate": 1.7428931062445844e-05,
"loss": 0.5709,
"step": 516
},
{
"epoch": 0.51,
"learning_rate": 1.741813119684339e-05,
"loss": 0.6271,
"step": 517
},
{
"epoch": 0.52,
"learning_rate": 1.7407312057301807e-05,
"loss": 0.6192,
"step": 518
},
{
"epoch": 0.52,
"learning_rate": 1.7396473671931597e-05,
"loss": 0.5847,
"step": 519
},
{
"epoch": 0.52,
"learning_rate": 1.738561606889328e-05,
"loss": 0.6313,
"step": 520
},
{
"epoch": 0.52,
"learning_rate": 1.73747392763973e-05,
"loss": 0.6231,
"step": 521
},
{
"epoch": 0.52,
"learning_rate": 1.7363843322703956e-05,
"loss": 0.5879,
"step": 522
},
{
"epoch": 0.52,
"learning_rate": 1.7352928236123337e-05,
"loss": 0.6387,
"step": 523
},
{
"epoch": 0.52,
"learning_rate": 1.7341994045015245e-05,
"loss": 0.6781,
"step": 524
},
{
"epoch": 0.52,
"learning_rate": 1.7331040777789122e-05,
"loss": 0.6501,
"step": 525
},
{
"epoch": 0.52,
"learning_rate": 1.732006846290396e-05,
"loss": 0.6286,
"step": 526
},
{
"epoch": 0.52,
"learning_rate": 1.730907712886825e-05,
"loss": 0.6595,
"step": 527
},
{
"epoch": 0.53,
"learning_rate": 1.7298066804239904e-05,
"loss": 0.604,
"step": 528
},
{
"epoch": 0.53,
"learning_rate": 1.7287037517626174e-05,
"loss": 0.6222,
"step": 529
},
{
"epoch": 0.53,
"learning_rate": 1.7275989297683575e-05,
"loss": 0.6315,
"step": 530
},
{
"epoch": 0.53,
"learning_rate": 1.726492217311781e-05,
"loss": 0.6157,
"step": 531
},
{
"epoch": 0.53,
"learning_rate": 1.725383617268371e-05,
"loss": 0.6616,
"step": 532
},
{
"epoch": 0.53,
"learning_rate": 1.7242731325185152e-05,
"loss": 0.5931,
"step": 533
},
{
"epoch": 0.53,
"learning_rate": 1.7231607659474972e-05,
"loss": 0.658,
"step": 534
},
{
"epoch": 0.53,
"learning_rate": 1.7220465204454902e-05,
"loss": 0.6186,
"step": 535
},
{
"epoch": 0.53,
"learning_rate": 1.7209303989075497e-05,
"loss": 0.5955,
"step": 536
},
{
"epoch": 0.53,
"learning_rate": 1.7198124042336055e-05,
"loss": 0.6567,
"step": 537
},
{
"epoch": 0.54,
"learning_rate": 1.7186925393284538e-05,
"loss": 0.6332,
"step": 538
},
{
"epoch": 0.54,
"learning_rate": 1.7175708071017503e-05,
"loss": 0.628,
"step": 539
},
{
"epoch": 0.54,
"learning_rate": 1.7164472104680026e-05,
"loss": 0.6144,
"step": 540
},
{
"epoch": 0.54,
"learning_rate": 1.715321752346563e-05,
"loss": 0.664,
"step": 541
},
{
"epoch": 0.54,
"learning_rate": 1.7141944356616185e-05,
"loss": 0.6983,
"step": 542
},
{
"epoch": 0.54,
"learning_rate": 1.7130652633421864e-05,
"loss": 0.6177,
"step": 543
},
{
"epoch": 0.54,
"learning_rate": 1.7119342383221055e-05,
"loss": 0.6634,
"step": 544
},
{
"epoch": 0.54,
"learning_rate": 1.7108013635400283e-05,
"loss": 0.6421,
"step": 545
},
{
"epoch": 0.54,
"learning_rate": 1.7096666419394122e-05,
"loss": 0.6453,
"step": 546
},
{
"epoch": 0.54,
"learning_rate": 1.7085300764685147e-05,
"loss": 0.6599,
"step": 547
},
{
"epoch": 0.55,
"learning_rate": 1.707391670080383e-05,
"loss": 0.7101,
"step": 548
},
{
"epoch": 0.55,
"learning_rate": 1.7062514257328474e-05,
"loss": 0.6757,
"step": 549
},
{
"epoch": 0.55,
"learning_rate": 1.705109346388514e-05,
"loss": 0.6949,
"step": 550
},
{
"epoch": 0.55,
"learning_rate": 1.703965435014757e-05,
"loss": 0.6671,
"step": 551
},
{
"epoch": 0.55,
"learning_rate": 1.7028196945837097e-05,
"loss": 0.6966,
"step": 552
},
{
"epoch": 0.55,
"learning_rate": 1.7016721280722585e-05,
"loss": 0.6081,
"step": 553
},
{
"epoch": 0.55,
"learning_rate": 1.7005227384620336e-05,
"loss": 0.6207,
"step": 554
},
{
"epoch": 0.55,
"learning_rate": 1.699371528739403e-05,
"loss": 0.569,
"step": 555
},
{
"epoch": 0.55,
"learning_rate": 1.698218501895463e-05,
"loss": 0.6214,
"step": 556
},
{
"epoch": 0.55,
"learning_rate": 1.6970636609260308e-05,
"loss": 0.6936,
"step": 557
},
{
"epoch": 0.55,
"learning_rate": 1.6959070088316384e-05,
"loss": 0.6793,
"step": 558
},
{
"epoch": 0.56,
"learning_rate": 1.6947485486175223e-05,
"loss": 0.587,
"step": 559
},
{
"epoch": 0.56,
"learning_rate": 1.6935882832936177e-05,
"loss": 0.6241,
"step": 560
},
{
"epoch": 0.56,
"learning_rate": 1.6924262158745493e-05,
"loss": 0.6018,
"step": 561
},
{
"epoch": 0.56,
"learning_rate": 1.691262349379624e-05,
"loss": 0.6667,
"step": 562
},
{
"epoch": 0.56,
"learning_rate": 1.6900966868328236e-05,
"loss": 0.666,
"step": 563
},
{
"epoch": 0.56,
"learning_rate": 1.688929231262797e-05,
"loss": 0.6603,
"step": 564
},
{
"epoch": 0.56,
"learning_rate": 1.6877599857028495e-05,
"loss": 0.6142,
"step": 565
},
{
"epoch": 0.56,
"learning_rate": 1.6865889531909397e-05,
"loss": 0.5954,
"step": 566
},
{
"epoch": 0.56,
"learning_rate": 1.685416136769668e-05,
"loss": 0.6811,
"step": 567
},
{
"epoch": 0.56,
"learning_rate": 1.684241539486269e-05,
"loss": 0.6814,
"step": 568
},
{
"epoch": 0.57,
"learning_rate": 1.683065164392606e-05,
"loss": 0.7413,
"step": 569
},
{
"epoch": 0.57,
"learning_rate": 1.681887014545161e-05,
"loss": 0.6803,
"step": 570
},
{
"epoch": 0.57,
"learning_rate": 1.680707093005026e-05,
"loss": 0.707,
"step": 571
},
{
"epoch": 0.57,
"learning_rate": 1.679525402837897e-05,
"loss": 0.6282,
"step": 572
},
{
"epoch": 0.57,
"learning_rate": 1.6783419471140665e-05,
"loss": 0.6586,
"step": 573
},
{
"epoch": 0.57,
"learning_rate": 1.6771567289084122e-05,
"loss": 0.64,
"step": 574
},
{
"epoch": 0.57,
"learning_rate": 1.6759697513003926e-05,
"loss": 0.6282,
"step": 575
},
{
"epoch": 0.57,
"learning_rate": 1.674781017374037e-05,
"loss": 0.5877,
"step": 576
},
{
"epoch": 0.57,
"learning_rate": 1.6735905302179377e-05,
"loss": 0.6247,
"step": 577
},
{
"epoch": 0.57,
"learning_rate": 1.6723982929252428e-05,
"loss": 0.6917,
"step": 578
},
{
"epoch": 0.58,
"learning_rate": 1.6712043085936473e-05,
"loss": 0.6647,
"step": 579
},
{
"epoch": 0.58,
"learning_rate": 1.6700085803253858e-05,
"loss": 0.6931,
"step": 580
},
{
"epoch": 0.58,
"learning_rate": 1.668811111227224e-05,
"loss": 0.6597,
"step": 581
},
{
"epoch": 0.58,
"learning_rate": 1.6676119044104494e-05,
"loss": 0.627,
"step": 582
},
{
"epoch": 0.58,
"learning_rate": 1.666410962990867e-05,
"loss": 0.5954,
"step": 583
},
{
"epoch": 0.58,
"learning_rate": 1.6652082900887858e-05,
"loss": 0.6516,
"step": 584
},
{
"epoch": 0.58,
"learning_rate": 1.664003888829016e-05,
"loss": 0.6377,
"step": 585
},
{
"epoch": 0.58,
"learning_rate": 1.6627977623408566e-05,
"loss": 0.6542,
"step": 586
},
{
"epoch": 0.58,
"learning_rate": 1.6615899137580904e-05,
"loss": 0.6716,
"step": 587
},
{
"epoch": 0.58,
"learning_rate": 1.660380346218974e-05,
"loss": 0.606,
"step": 588
},
{
"epoch": 0.59,
"learning_rate": 1.6591690628662305e-05,
"loss": 0.616,
"step": 589
},
{
"epoch": 0.59,
"learning_rate": 1.657956066847041e-05,
"loss": 0.6054,
"step": 590
},
{
"epoch": 0.59,
"learning_rate": 1.6567413613130354e-05,
"loss": 0.612,
"step": 591
},
{
"epoch": 0.59,
"learning_rate": 1.6555249494202875e-05,
"loss": 0.654,
"step": 592
},
{
"epoch": 0.59,
"learning_rate": 1.654306834329303e-05,
"loss": 0.7106,
"step": 593
},
{
"epoch": 0.59,
"learning_rate": 1.6530870192050134e-05,
"loss": 0.6601,
"step": 594
},
{
"epoch": 0.59,
"learning_rate": 1.6518655072167666e-05,
"loss": 0.6028,
"step": 595
},
{
"epoch": 0.59,
"learning_rate": 1.65064230153832e-05,
"loss": 0.6381,
"step": 596
},
{
"epoch": 0.59,
"learning_rate": 1.649417405347832e-05,
"loss": 0.6052,
"step": 597
},
{
"epoch": 0.59,
"learning_rate": 1.6481908218278524e-05,
"loss": 0.6177,
"step": 598
},
{
"epoch": 0.6,
"learning_rate": 1.6469625541653152e-05,
"loss": 0.668,
"step": 599
},
{
"epoch": 0.6,
"learning_rate": 1.6457326055515305e-05,
"loss": 0.6135,
"step": 600
},
{
"epoch": 0.6,
"learning_rate": 1.644500979182176e-05,
"loss": 0.6258,
"step": 601
},
{
"epoch": 0.6,
"learning_rate": 1.6432676782572883e-05,
"loss": 0.6496,
"step": 602
},
{
"epoch": 0.6,
"learning_rate": 1.6420327059812547e-05,
"loss": 0.5972,
"step": 603
},
{
"epoch": 0.6,
"learning_rate": 1.6407960655628055e-05,
"loss": 0.6349,
"step": 604
},
{
"epoch": 0.6,
"learning_rate": 1.6395577602150043e-05,
"loss": 0.6827,
"step": 605
},
{
"epoch": 0.6,
"learning_rate": 1.6383177931552422e-05,
"loss": 0.6728,
"step": 606
},
{
"epoch": 0.6,
"learning_rate": 1.637076167605226e-05,
"loss": 0.7084,
"step": 607
},
{
"epoch": 0.6,
"learning_rate": 1.635832886790973e-05,
"loss": 0.7076,
"step": 608
},
{
"epoch": 0.61,
"learning_rate": 1.6345879539428e-05,
"loss": 0.597,
"step": 609
},
{
"epoch": 0.61,
"learning_rate": 1.6333413722953175e-05,
"loss": 0.6083,
"step": 610
},
{
"epoch": 0.61,
"learning_rate": 1.632093145087419e-05,
"loss": 0.5898,
"step": 611
},
{
"epoch": 0.61,
"learning_rate": 1.630843275562274e-05,
"loss": 0.622,
"step": 612
},
{
"epoch": 0.61,
"learning_rate": 1.629591766967318e-05,
"loss": 0.5837,
"step": 613
},
{
"epoch": 0.61,
"learning_rate": 1.6283386225542467e-05,
"loss": 0.6169,
"step": 614
},
{
"epoch": 0.61,
"learning_rate": 1.6270838455790056e-05,
"loss": 0.6189,
"step": 615
},
{
"epoch": 0.61,
"learning_rate": 1.6258274393017814e-05,
"loss": 0.6919,
"step": 616
},
{
"epoch": 0.61,
"learning_rate": 1.6245694069869945e-05,
"loss": 0.6176,
"step": 617
},
{
"epoch": 0.61,
"learning_rate": 1.62330975190329e-05,
"loss": 0.6348,
"step": 618
},
{
"epoch": 0.62,
"learning_rate": 1.622048477323529e-05,
"loss": 0.6459,
"step": 619
},
{
"epoch": 0.62,
"learning_rate": 1.620785586524781e-05,
"loss": 0.6236,
"step": 620
},
{
"epoch": 0.62,
"learning_rate": 1.6195210827883146e-05,
"loss": 0.6287,
"step": 621
},
{
"epoch": 0.62,
"learning_rate": 1.6182549693995893e-05,
"loss": 0.6218,
"step": 622
},
{
"epoch": 0.62,
"learning_rate": 1.6169872496482462e-05,
"loss": 0.6691,
"step": 623
},
{
"epoch": 0.62,
"learning_rate": 1.6157179268281007e-05,
"loss": 0.653,
"step": 624
},
{
"epoch": 0.62,
"learning_rate": 1.614447004237133e-05,
"loss": 0.6649,
"step": 625
},
{
"epoch": 0.62,
"learning_rate": 1.61317448517748e-05,
"loss": 0.6055,
"step": 626
},
{
"epoch": 0.62,
"learning_rate": 1.611900372955427e-05,
"loss": 0.7133,
"step": 627
},
{
"epoch": 0.62,
"learning_rate": 1.6106246708813973e-05,
"loss": 0.661,
"step": 628
},
{
"epoch": 0.63,
"learning_rate": 1.6093473822699467e-05,
"loss": 0.6101,
"step": 629
},
{
"epoch": 0.63,
"learning_rate": 1.608068510439752e-05,
"loss": 0.6209,
"step": 630
},
{
"epoch": 0.63,
"learning_rate": 1.6067880587136047e-05,
"loss": 0.6485,
"step": 631
},
{
"epoch": 0.63,
"learning_rate": 1.6055060304184e-05,
"loss": 0.6972,
"step": 632
},
{
"epoch": 0.63,
"learning_rate": 1.6042224288851292e-05,
"loss": 0.6392,
"step": 633
},
{
"epoch": 0.63,
"learning_rate": 1.6029372574488732e-05,
"loss": 0.5941,
"step": 634
},
{
"epoch": 0.63,
"learning_rate": 1.6016505194487894e-05,
"loss": 0.6168,
"step": 635
},
{
"epoch": 0.63,
"learning_rate": 1.600362218228107e-05,
"loss": 0.6962,
"step": 636
},
{
"epoch": 0.63,
"learning_rate": 1.5990723571341167e-05,
"loss": 0.6139,
"step": 637
},
{
"epoch": 0.63,
"learning_rate": 1.5977809395181615e-05,
"loss": 0.6416,
"step": 638
},
{
"epoch": 0.64,
"learning_rate": 1.5964879687356286e-05,
"loss": 0.5989,
"step": 639
},
{
"epoch": 0.64,
"learning_rate": 1.595193448145941e-05,
"loss": 0.6464,
"step": 640
},
{
"epoch": 0.64,
"learning_rate": 1.5938973811125493e-05,
"loss": 0.6293,
"step": 641
},
{
"epoch": 0.64,
"learning_rate": 1.59259977100292e-05,
"loss": 0.6367,
"step": 642
},
{
"epoch": 0.64,
"learning_rate": 1.591300621188531e-05,
"loss": 0.6843,
"step": 643
},
{
"epoch": 0.64,
"learning_rate": 1.589999935044859e-05,
"loss": 0.6632,
"step": 644
},
{
"epoch": 0.64,
"learning_rate": 1.5886977159513732e-05,
"loss": 0.6747,
"step": 645
},
{
"epoch": 0.64,
"learning_rate": 1.5873939672915263e-05,
"loss": 0.6435,
"step": 646
},
{
"epoch": 0.64,
"learning_rate": 1.586088692452744e-05,
"loss": 0.6339,
"step": 647
},
{
"epoch": 0.64,
"learning_rate": 1.5847818948264183e-05,
"loss": 0.6781,
"step": 648
},
{
"epoch": 0.65,
"learning_rate": 1.5834735778078968e-05,
"loss": 0.6314,
"step": 649
},
{
"epoch": 0.65,
"learning_rate": 1.5821637447964757e-05,
"loss": 0.6327,
"step": 650
},
{
"epoch": 0.65,
"learning_rate": 1.58085239919539e-05,
"loss": 0.5932,
"step": 651
},
{
"epoch": 0.65,
"learning_rate": 1.5795395444118037e-05,
"loss": 0.6264,
"step": 652
},
{
"epoch": 0.65,
"learning_rate": 1.5782251838568034e-05,
"loss": 0.574,
"step": 653
},
{
"epoch": 0.65,
"learning_rate": 1.5769093209453876e-05,
"loss": 0.6047,
"step": 654
},
{
"epoch": 0.65,
"learning_rate": 1.5755919590964572e-05,
"loss": 0.6573,
"step": 655
},
{
"epoch": 0.65,
"learning_rate": 1.5742731017328087e-05,
"loss": 0.6067,
"step": 656
},
{
"epoch": 0.65,
"learning_rate": 1.572952752281125e-05,
"loss": 0.7343,
"step": 657
},
{
"epoch": 0.65,
"learning_rate": 1.571630914171964e-05,
"loss": 0.598,
"step": 658
},
{
"epoch": 0.66,
"learning_rate": 1.5703075908397523e-05,
"loss": 0.6209,
"step": 659
},
{
"epoch": 0.66,
"learning_rate": 1.5689827857227755e-05,
"loss": 0.6603,
"step": 660
},
{
"epoch": 0.66,
"learning_rate": 1.5676565022631696e-05,
"loss": 0.6373,
"step": 661
},
{
"epoch": 0.66,
"learning_rate": 1.5663287439069114e-05,
"loss": 0.6221,
"step": 662
},
{
"epoch": 0.66,
"learning_rate": 1.5649995141038088e-05,
"loss": 0.6442,
"step": 663
},
{
"epoch": 0.66,
"learning_rate": 1.563668816307494e-05,
"loss": 0.6095,
"step": 664
},
{
"epoch": 0.66,
"learning_rate": 1.562336653975413e-05,
"loss": 0.625,
"step": 665
},
{
"epoch": 0.66,
"learning_rate": 1.5610030305688173e-05,
"loss": 0.6606,
"step": 666
},
{
"epoch": 0.66,
"learning_rate": 1.559667949552754e-05,
"loss": 0.684,
"step": 667
},
{
"epoch": 0.66,
"learning_rate": 1.558331414396058e-05,
"loss": 0.6011,
"step": 668
},
{
"epoch": 0.67,
"learning_rate": 1.556993428571342e-05,
"loss": 0.6515,
"step": 669
},
{
"epoch": 0.67,
"learning_rate": 1.5556539955549878e-05,
"loss": 0.6045,
"step": 670
},
{
"epoch": 0.67,
"learning_rate": 1.5543131188271374e-05,
"loss": 0.6265,
"step": 671
},
{
"epoch": 0.67,
"learning_rate": 1.5529708018716846e-05,
"loss": 0.6261,
"step": 672
},
{
"epoch": 0.67,
"learning_rate": 1.5516270481762634e-05,
"loss": 0.6576,
"step": 673
},
{
"epoch": 0.67,
"learning_rate": 1.550281861232243e-05,
"loss": 0.6596,
"step": 674
},
{
"epoch": 0.67,
"learning_rate": 1.548935244534715e-05,
"loss": 0.6664,
"step": 675
},
{
"epoch": 0.67,
"learning_rate": 1.547587201582486e-05,
"loss": 0.6487,
"step": 676
},
{
"epoch": 0.67,
"learning_rate": 1.5462377358780693e-05,
"loss": 0.5812,
"step": 677
},
{
"epoch": 0.67,
"learning_rate": 1.5448868509276736e-05,
"loss": 0.6703,
"step": 678
},
{
"epoch": 0.68,
"learning_rate": 1.5435345502411956e-05,
"loss": 0.6343,
"step": 679
},
{
"epoch": 0.68,
"learning_rate": 1.5421808373322105e-05,
"loss": 0.6391,
"step": 680
},
{
"epoch": 0.68,
"learning_rate": 1.5408257157179627e-05,
"loss": 0.6565,
"step": 681
},
{
"epoch": 0.68,
"learning_rate": 1.5394691889193564e-05,
"loss": 0.6323,
"step": 682
},
{
"epoch": 0.68,
"learning_rate": 1.538111260460948e-05,
"loss": 0.6661,
"step": 683
},
{
"epoch": 0.68,
"learning_rate": 1.536751933870934e-05,
"loss": 0.654,
"step": 684
},
{
"epoch": 0.68,
"learning_rate": 1.5353912126811435e-05,
"loss": 0.6332,
"step": 685
},
{
"epoch": 0.68,
"learning_rate": 1.534029100427032e-05,
"loss": 0.6235,
"step": 686
},
{
"epoch": 0.68,
"learning_rate": 1.5326656006476658e-05,
"loss": 0.5774,
"step": 687
},
{
"epoch": 0.68,
"learning_rate": 1.531300716885718e-05,
"loss": 0.6491,
"step": 688
},
{
"epoch": 0.69,
"learning_rate": 1.5299344526874576e-05,
"loss": 0.6699,
"step": 689
},
{
"epoch": 0.69,
"learning_rate": 1.5285668116027397e-05,
"loss": 0.645,
"step": 690
},
{
"epoch": 0.69,
"learning_rate": 1.5271977971849973e-05,
"loss": 0.6511,
"step": 691
},
{
"epoch": 0.69,
"learning_rate": 1.5258274129912311e-05,
"loss": 0.6154,
"step": 692
},
{
"epoch": 0.69,
"learning_rate": 1.5244556625820015e-05,
"loss": 0.5958,
"step": 693
},
{
"epoch": 0.69,
"learning_rate": 1.5230825495214184e-05,
"loss": 0.6602,
"step": 694
},
{
"epoch": 0.69,
"learning_rate": 1.5217080773771315e-05,
"loss": 0.6737,
"step": 695
},
{
"epoch": 0.69,
"learning_rate": 1.5203322497203228e-05,
"loss": 0.647,
"step": 696
},
{
"epoch": 0.69,
"learning_rate": 1.5189550701256955e-05,
"loss": 0.6099,
"step": 697
},
{
"epoch": 0.69,
"learning_rate": 1.5175765421714652e-05,
"loss": 0.622,
"step": 698
},
{
"epoch": 0.7,
"learning_rate": 1.5161966694393516e-05,
"loss": 0.5852,
"step": 699
},
{
"epoch": 0.7,
"learning_rate": 1.5148154555145684e-05,
"loss": 0.654,
"step": 700
},
{
"epoch": 0.7,
"learning_rate": 1.513432903985813e-05,
"loss": 0.6009,
"step": 701
},
{
"epoch": 0.7,
"learning_rate": 1.512049018445259e-05,
"loss": 0.6546,
"step": 702
},
{
"epoch": 0.7,
"learning_rate": 1.5106638024885465e-05,
"loss": 0.6528,
"step": 703
},
{
"epoch": 0.7,
"learning_rate": 1.5092772597147707e-05,
"loss": 0.597,
"step": 704
},
{
"epoch": 0.7,
"learning_rate": 1.507889393726476e-05,
"loss": 0.5966,
"step": 705
},
{
"epoch": 0.7,
"learning_rate": 1.5065002081296443e-05,
"loss": 0.6459,
"step": 706
},
{
"epoch": 0.7,
"learning_rate": 1.5051097065336846e-05,
"loss": 0.6364,
"step": 707
},
{
"epoch": 0.7,
"learning_rate": 1.503717892551427e-05,
"loss": 0.7106,
"step": 708
},
{
"epoch": 0.71,
"learning_rate": 1.5023247697991114e-05,
"loss": 0.6433,
"step": 709
},
{
"epoch": 0.71,
"learning_rate": 1.5009303418963772e-05,
"loss": 0.7182,
"step": 710
},
{
"epoch": 0.71,
"learning_rate": 1.4995346124662551e-05,
"loss": 0.5917,
"step": 711
},
{
"epoch": 0.71,
"learning_rate": 1.4981375851351579e-05,
"loss": 0.6753,
"step": 712
},
{
"epoch": 0.71,
"learning_rate": 1.4967392635328702e-05,
"loss": 0.6453,
"step": 713
},
{
"epoch": 0.71,
"learning_rate": 1.4953396512925398e-05,
"loss": 0.6367,
"step": 714
},
{
"epoch": 0.71,
"learning_rate": 1.4939387520506675e-05,
"loss": 0.6132,
"step": 715
},
{
"epoch": 0.71,
"learning_rate": 1.492536569447098e-05,
"loss": 0.6269,
"step": 716
},
{
"epoch": 0.71,
"learning_rate": 1.4911331071250113e-05,
"loss": 0.6404,
"step": 717
},
{
"epoch": 0.71,
"learning_rate": 1.4897283687309107e-05,
"loss": 0.6296,
"step": 718
},
{
"epoch": 0.72,
"learning_rate": 1.4883223579146167e-05,
"loss": 0.6437,
"step": 719
},
{
"epoch": 0.72,
"learning_rate": 1.4869150783292552e-05,
"loss": 0.6409,
"step": 720
},
{
"epoch": 0.72,
"learning_rate": 1.4855065336312482e-05,
"loss": 0.5544,
"step": 721
},
{
"epoch": 0.72,
"learning_rate": 1.4840967274803055e-05,
"loss": 0.6783,
"step": 722
},
{
"epoch": 0.72,
"learning_rate": 1.482685663539414e-05,
"loss": 0.6072,
"step": 723
},
{
"epoch": 0.72,
"learning_rate": 1.4812733454748283e-05,
"loss": 0.6093,
"step": 724
},
{
"epoch": 0.72,
"learning_rate": 1.4798597769560623e-05,
"loss": 0.5691,
"step": 725
},
{
"epoch": 0.72,
"learning_rate": 1.4784449616558785e-05,
"loss": 0.6241,
"step": 726
},
{
"epoch": 0.72,
"learning_rate": 1.4770289032502785e-05,
"loss": 0.6191,
"step": 727
},
{
"epoch": 0.72,
"learning_rate": 1.475611605418494e-05,
"loss": 0.8272,
"step": 728
},
{
"epoch": 0.73,
"learning_rate": 1.4741930718429772e-05,
"loss": 0.5563,
"step": 729
},
{
"epoch": 0.73,
"learning_rate": 1.4727733062093905e-05,
"loss": 0.6515,
"step": 730
},
{
"epoch": 0.73,
"learning_rate": 1.4713523122065981e-05,
"loss": 0.6024,
"step": 731
},
{
"epoch": 0.73,
"learning_rate": 1.4699300935266557e-05,
"loss": 0.6459,
"step": 732
},
{
"epoch": 0.73,
"learning_rate": 1.4685066538648e-05,
"loss": 0.6279,
"step": 733
},
{
"epoch": 0.73,
"learning_rate": 1.4670819969194416e-05,
"loss": 0.637,
"step": 734
},
{
"epoch": 0.73,
"learning_rate": 1.4656561263921529e-05,
"loss": 0.6378,
"step": 735
},
{
"epoch": 0.73,
"learning_rate": 1.464229045987659e-05,
"loss": 0.5813,
"step": 736
},
{
"epoch": 0.73,
"learning_rate": 1.4628007594138307e-05,
"loss": 0.6014,
"step": 737
},
{
"epoch": 0.73,
"learning_rate": 1.4613712703816696e-05,
"loss": 0.5909,
"step": 738
},
{
"epoch": 0.73,
"learning_rate": 1.4599405826053039e-05,
"loss": 0.6687,
"step": 739
},
{
"epoch": 0.74,
"learning_rate": 1.4585086998019757e-05,
"loss": 0.6203,
"step": 740
},
{
"epoch": 0.74,
"learning_rate": 1.4570756256920318e-05,
"loss": 0.5934,
"step": 741
},
{
"epoch": 0.74,
"learning_rate": 1.4556413639989142e-05,
"loss": 0.6561,
"step": 742
},
{
"epoch": 0.74,
"learning_rate": 1.4542059184491513e-05,
"loss": 0.59,
"step": 743
},
{
"epoch": 0.74,
"learning_rate": 1.4527692927723465e-05,
"loss": 0.69,
"step": 744
},
{
"epoch": 0.74,
"learning_rate": 1.4513314907011698e-05,
"loss": 0.6064,
"step": 745
},
{
"epoch": 0.74,
"learning_rate": 1.449892515971348e-05,
"loss": 0.6644,
"step": 746
},
{
"epoch": 0.74,
"learning_rate": 1.4484523723216542e-05,
"loss": 0.6059,
"step": 747
},
{
"epoch": 0.74,
"learning_rate": 1.447011063493899e-05,
"loss": 0.6174,
"step": 748
},
{
"epoch": 0.74,
"learning_rate": 1.4455685932329204e-05,
"loss": 0.636,
"step": 749
},
{
"epoch": 0.75,
"learning_rate": 1.4441249652865737e-05,
"loss": 0.5573,
"step": 750
},
{
"epoch": 0.75,
"learning_rate": 1.4426801834057224e-05,
"loss": 0.6531,
"step": 751
},
{
"epoch": 0.75,
"learning_rate": 1.4412342513442283e-05,
"loss": 0.5889,
"step": 752
},
{
"epoch": 0.75,
"learning_rate": 1.439787172858941e-05,
"loss": 0.606,
"step": 753
},
{
"epoch": 0.75,
"learning_rate": 1.4383389517096899e-05,
"loss": 0.6039,
"step": 754
},
{
"epoch": 0.75,
"learning_rate": 1.4368895916592723e-05,
"loss": 0.619,
"step": 755
},
{
"epoch": 0.75,
"learning_rate": 1.4354390964734444e-05,
"loss": 0.6378,
"step": 756
},
{
"epoch": 0.75,
"learning_rate": 1.433987469920913e-05,
"loss": 0.5943,
"step": 757
},
{
"epoch": 0.75,
"learning_rate": 1.4325347157733232e-05,
"loss": 0.6004,
"step": 758
},
{
"epoch": 0.75,
"learning_rate": 1.4310808378052506e-05,
"loss": 0.6814,
"step": 759
},
{
"epoch": 0.76,
"learning_rate": 1.4296258397941905e-05,
"loss": 0.6315,
"step": 760
},
{
"epoch": 0.76,
"learning_rate": 1.4281697255205478e-05,
"loss": 0.6236,
"step": 761
},
{
"epoch": 0.76,
"learning_rate": 1.4267124987676288e-05,
"loss": 0.6036,
"step": 762
},
{
"epoch": 0.76,
"learning_rate": 1.4252541633216295e-05,
"loss": 0.6046,
"step": 763
},
{
"epoch": 0.76,
"learning_rate": 1.4237947229716262e-05,
"loss": 0.5763,
"step": 764
},
{
"epoch": 0.76,
"learning_rate": 1.4223341815095672e-05,
"loss": 0.5621,
"step": 765
},
{
"epoch": 0.76,
"learning_rate": 1.4208725427302606e-05,
"loss": 0.5938,
"step": 766
},
{
"epoch": 0.76,
"learning_rate": 1.4194098104313656e-05,
"loss": 0.6015,
"step": 767
},
{
"epoch": 0.76,
"learning_rate": 1.4179459884133836e-05,
"loss": 0.6045,
"step": 768
},
{
"epoch": 0.76,
"learning_rate": 1.4164810804796464e-05,
"loss": 0.6367,
"step": 769
},
{
"epoch": 0.77,
"learning_rate": 1.4150150904363072e-05,
"loss": 0.6264,
"step": 770
},
{
"epoch": 0.77,
"learning_rate": 1.413548022092332e-05,
"loss": 0.6353,
"step": 771
},
{
"epoch": 0.77,
"learning_rate": 1.4120798792594868e-05,
"loss": 0.5893,
"step": 772
},
{
"epoch": 0.77,
"learning_rate": 1.4106106657523301e-05,
"loss": 0.6197,
"step": 773
},
{
"epoch": 0.77,
"learning_rate": 1.409140385388203e-05,
"loss": 0.5622,
"step": 774
},
{
"epoch": 0.77,
"learning_rate": 1.407669041987217e-05,
"loss": 0.6142,
"step": 775
},
{
"epoch": 0.77,
"learning_rate": 1.4061966393722468e-05,
"loss": 0.6106,
"step": 776
},
{
"epoch": 0.77,
"learning_rate": 1.4047231813689193e-05,
"loss": 0.6018,
"step": 777
},
{
"epoch": 0.77,
"learning_rate": 1.4032486718056016e-05,
"loss": 0.596,
"step": 778
},
{
"epoch": 0.77,
"learning_rate": 1.4017731145133955e-05,
"loss": 0.6388,
"step": 779
},
{
"epoch": 0.78,
"learning_rate": 1.4002965133261238e-05,
"loss": 0.6808,
"step": 780
},
{
"epoch": 0.78,
"learning_rate": 1.3988188720803213e-05,
"loss": 0.6211,
"step": 781
},
{
"epoch": 0.78,
"learning_rate": 1.397340194615226e-05,
"loss": 0.6149,
"step": 782
},
{
"epoch": 0.78,
"learning_rate": 1.3958604847727673e-05,
"loss": 0.5746,
"step": 783
},
{
"epoch": 0.78,
"learning_rate": 1.3943797463975575e-05,
"loss": 0.6059,
"step": 784
},
{
"epoch": 0.78,
"learning_rate": 1.3928979833368813e-05,
"loss": 0.6003,
"step": 785
},
{
"epoch": 0.78,
"learning_rate": 1.3914151994406852e-05,
"loss": 0.6174,
"step": 786
},
{
"epoch": 0.78,
"learning_rate": 1.3899313985615687e-05,
"loss": 0.6488,
"step": 787
},
{
"epoch": 0.78,
"learning_rate": 1.3884465845547734e-05,
"loss": 0.6069,
"step": 788
},
{
"epoch": 0.78,
"learning_rate": 1.3869607612781733e-05,
"loss": 0.6358,
"step": 789
},
{
"epoch": 0.79,
"learning_rate": 1.385473932592264e-05,
"loss": 0.6356,
"step": 790
},
{
"epoch": 0.79,
"learning_rate": 1.383986102360155e-05,
"loss": 0.5956,
"step": 791
},
{
"epoch": 0.79,
"learning_rate": 1.3824972744475557e-05,
"loss": 0.6346,
"step": 792
},
{
"epoch": 0.79,
"learning_rate": 1.3810074527227703e-05,
"loss": 0.6317,
"step": 793
},
{
"epoch": 0.79,
"learning_rate": 1.3795166410566834e-05,
"loss": 0.6343,
"step": 794
},
{
"epoch": 0.79,
"learning_rate": 1.3780248433227517e-05,
"loss": 0.5908,
"step": 795
},
{
"epoch": 0.79,
"learning_rate": 1.376532063396995e-05,
"loss": 0.6344,
"step": 796
},
{
"epoch": 0.79,
"learning_rate": 1.3750383051579842e-05,
"loss": 0.5779,
"step": 797
},
{
"epoch": 0.79,
"learning_rate": 1.3735435724868323e-05,
"loss": 0.6194,
"step": 798
},
{
"epoch": 0.79,
"learning_rate": 1.372047869267184e-05,
"loss": 0.6273,
"step": 799
},
{
"epoch": 0.8,
"learning_rate": 1.370551199385206e-05,
"loss": 0.6385,
"step": 800
},
{
"epoch": 0.8,
"learning_rate": 1.3690535667295759e-05,
"loss": 0.6242,
"step": 801
},
{
"epoch": 0.8,
"learning_rate": 1.3675549751914736e-05,
"loss": 0.5952,
"step": 802
},
{
"epoch": 0.8,
"learning_rate": 1.3660554286645704e-05,
"loss": 0.6523,
"step": 803
},
{
"epoch": 0.8,
"learning_rate": 1.364554931045018e-05,
"loss": 0.7132,
"step": 804
},
{
"epoch": 0.8,
"learning_rate": 1.36305348623144e-05,
"loss": 0.6442,
"step": 805
},
{
"epoch": 0.8,
"learning_rate": 1.361551098124921e-05,
"loss": 0.616,
"step": 806
},
{
"epoch": 0.8,
"learning_rate": 1.3600477706289956e-05,
"loss": 0.582,
"step": 807
},
{
"epoch": 0.8,
"learning_rate": 1.3585435076496407e-05,
"loss": 0.6146,
"step": 808
},
{
"epoch": 0.8,
"learning_rate": 1.3570383130952627e-05,
"loss": 0.6244,
"step": 809
},
{
"epoch": 0.81,
"learning_rate": 1.3555321908766882e-05,
"loss": 0.6546,
"step": 810
},
{
"epoch": 0.81,
"learning_rate": 1.3540251449071552e-05,
"loss": 0.6114,
"step": 811
},
{
"epoch": 0.81,
"learning_rate": 1.3525171791023002e-05,
"loss": 0.6234,
"step": 812
},
{
"epoch": 0.81,
"learning_rate": 1.3510082973801515e-05,
"loss": 0.6471,
"step": 813
},
{
"epoch": 0.81,
"learning_rate": 1.349498503661116e-05,
"loss": 0.6366,
"step": 814
},
{
"epoch": 0.81,
"learning_rate": 1.3479878018679702e-05,
"loss": 0.619,
"step": 815
},
{
"epoch": 0.81,
"learning_rate": 1.3464761959258506e-05,
"loss": 0.5983,
"step": 816
},
{
"epoch": 0.81,
"learning_rate": 1.3449636897622416e-05,
"loss": 0.6182,
"step": 817
},
{
"epoch": 0.81,
"learning_rate": 1.3434502873069683e-05,
"loss": 0.6611,
"step": 818
},
{
"epoch": 0.81,
"learning_rate": 1.3419359924921833e-05,
"loss": 0.612,
"step": 819
},
{
"epoch": 0.82,
"learning_rate": 1.3404208092523584e-05,
"loss": 0.6394,
"step": 820
},
{
"epoch": 0.82,
"learning_rate": 1.338904741524273e-05,
"loss": 0.6003,
"step": 821
},
{
"epoch": 0.82,
"learning_rate": 1.3373877932470057e-05,
"loss": 0.6585,
"step": 822
},
{
"epoch": 0.82,
"learning_rate": 1.3358699683619216e-05,
"loss": 0.6685,
"step": 823
},
{
"epoch": 0.82,
"learning_rate": 1.3343512708126642e-05,
"loss": 0.6073,
"step": 824
},
{
"epoch": 0.82,
"learning_rate": 1.3328317045451455e-05,
"loss": 0.6581,
"step": 825
},
{
"epoch": 0.82,
"learning_rate": 1.331311273507532e-05,
"loss": 0.6696,
"step": 826
},
{
"epoch": 0.82,
"learning_rate": 1.3297899816502391e-05,
"loss": 0.6221,
"step": 827
},
{
"epoch": 0.82,
"learning_rate": 1.3282678329259185e-05,
"loss": 0.6457,
"step": 828
},
{
"epoch": 0.82,
"learning_rate": 1.326744831289447e-05,
"loss": 0.6109,
"step": 829
},
{
"epoch": 0.83,
"learning_rate": 1.3252209806979193e-05,
"loss": 0.5569,
"step": 830
},
{
"epoch": 0.83,
"learning_rate": 1.3236962851106346e-05,
"loss": 0.6697,
"step": 831
},
{
"epoch": 0.83,
"learning_rate": 1.322170748489088e-05,
"loss": 0.6458,
"step": 832
},
{
"epoch": 0.83,
"learning_rate": 1.32064437479696e-05,
"loss": 0.6235,
"step": 833
},
{
"epoch": 0.83,
"learning_rate": 1.3191171680001048e-05,
"loss": 0.5603,
"step": 834
},
{
"epoch": 0.83,
"learning_rate": 1.3175891320665431e-05,
"loss": 0.6584,
"step": 835
},
{
"epoch": 0.83,
"learning_rate": 1.3160602709664482e-05,
"loss": 0.5955,
"step": 836
},
{
"epoch": 0.83,
"learning_rate": 1.3145305886721389e-05,
"loss": 0.637,
"step": 837
},
{
"epoch": 0.83,
"learning_rate": 1.3130000891580661e-05,
"loss": 0.607,
"step": 838
},
{
"epoch": 0.83,
"learning_rate": 1.3114687764008048e-05,
"loss": 0.6458,
"step": 839
},
{
"epoch": 0.84,
"learning_rate": 1.309936654379043e-05,
"loss": 0.6418,
"step": 840
},
{
"epoch": 0.84,
"learning_rate": 1.3084037270735714e-05,
"loss": 0.5792,
"step": 841
},
{
"epoch": 0.84,
"learning_rate": 1.3068699984672728e-05,
"loss": 0.57,
"step": 842
},
{
"epoch": 0.84,
"learning_rate": 1.3053354725451119e-05,
"loss": 0.5929,
"step": 843
},
{
"epoch": 0.84,
"learning_rate": 1.3038001532941249e-05,
"loss": 0.5656,
"step": 844
},
{
"epoch": 0.84,
"learning_rate": 1.3022640447034104e-05,
"loss": 0.5648,
"step": 845
},
{
"epoch": 0.84,
"learning_rate": 1.3007271507641156e-05,
"loss": 0.5659,
"step": 846
},
{
"epoch": 0.84,
"learning_rate": 1.2991894754694303e-05,
"loss": 0.5866,
"step": 847
},
{
"epoch": 0.84,
"learning_rate": 1.297651022814574e-05,
"loss": 0.5759,
"step": 848
},
{
"epoch": 0.84,
"learning_rate": 1.2961117967967844e-05,
"loss": 0.6466,
"step": 849
},
{
"epoch": 0.85,
"learning_rate": 1.2945718014153116e-05,
"loss": 0.61,
"step": 850
},
{
"epoch": 0.85,
"learning_rate": 1.2930310406714011e-05,
"loss": 0.6612,
"step": 851
},
{
"epoch": 0.85,
"learning_rate": 1.2914895185682899e-05,
"loss": 0.6192,
"step": 852
},
{
"epoch": 0.85,
"learning_rate": 1.2899472391111916e-05,
"loss": 0.6024,
"step": 853
},
{
"epoch": 0.85,
"learning_rate": 1.2884042063072881e-05,
"loss": 0.6221,
"step": 854
},
{
"epoch": 0.85,
"learning_rate": 1.2868604241657187e-05,
"loss": 0.6554,
"step": 855
},
{
"epoch": 0.85,
"learning_rate": 1.2853158966975693e-05,
"loss": 0.6397,
"step": 856
},
{
"epoch": 0.85,
"learning_rate": 1.2837706279158626e-05,
"loss": 0.6314,
"step": 857
},
{
"epoch": 0.85,
"learning_rate": 1.2822246218355475e-05,
"loss": 0.6293,
"step": 858
},
{
"epoch": 0.85,
"learning_rate": 1.280677882473488e-05,
"loss": 0.6527,
"step": 859
},
{
"epoch": 0.86,
"learning_rate": 1.2791304138484539e-05,
"loss": 0.6089,
"step": 860
},
{
"epoch": 0.86,
"learning_rate": 1.2775822199811097e-05,
"loss": 0.6274,
"step": 861
},
{
"epoch": 0.86,
"learning_rate": 1.2760333048940043e-05,
"loss": 0.5853,
"step": 862
},
{
"epoch": 0.86,
"learning_rate": 1.27448367261156e-05,
"loss": 0.6028,
"step": 863
},
{
"epoch": 0.86,
"learning_rate": 1.272933327160063e-05,
"loss": 0.6786,
"step": 864
},
{
"epoch": 0.86,
"learning_rate": 1.2713822725676526e-05,
"loss": 0.6135,
"step": 865
},
{
"epoch": 0.86,
"learning_rate": 1.2698305128643099e-05,
"loss": 0.6188,
"step": 866
},
{
"epoch": 0.86,
"learning_rate": 1.2682780520818488e-05,
"loss": 0.6596,
"step": 867
},
{
"epoch": 0.86,
"learning_rate": 1.266724894253904e-05,
"loss": 0.6224,
"step": 868
},
{
"epoch": 0.86,
"learning_rate": 1.2651710434159223e-05,
"loss": 0.6583,
"step": 869
},
{
"epoch": 0.87,
"learning_rate": 1.2636165036051505e-05,
"loss": 0.6162,
"step": 870
},
{
"epoch": 0.87,
"learning_rate": 1.2620612788606246e-05,
"loss": 0.658,
"step": 871
},
{
"epoch": 0.87,
"learning_rate": 1.260505373223162e-05,
"loss": 0.5569,
"step": 872
},
{
"epoch": 0.87,
"learning_rate": 1.2589487907353484e-05,
"loss": 0.5709,
"step": 873
},
{
"epoch": 0.87,
"learning_rate": 1.2573915354415274e-05,
"loss": 0.5685,
"step": 874
},
{
"epoch": 0.87,
"learning_rate": 1.2558336113877916e-05,
"loss": 0.6361,
"step": 875
},
{
"epoch": 0.87,
"learning_rate": 1.2542750226219711e-05,
"loss": 0.6597,
"step": 876
},
{
"epoch": 0.87,
"learning_rate": 1.2527157731936228e-05,
"loss": 0.5965,
"step": 877
},
{
"epoch": 0.87,
"learning_rate": 1.2511558671540198e-05,
"loss": 0.5527,
"step": 878
},
{
"epoch": 0.87,
"learning_rate": 1.2495953085561426e-05,
"loss": 0.6598,
"step": 879
},
{
"epoch": 0.88,
"learning_rate": 1.2480341014546653e-05,
"loss": 0.6458,
"step": 880
},
{
"epoch": 0.88,
"learning_rate": 1.2464722499059481e-05,
"loss": 0.6243,
"step": 881
},
{
"epoch": 0.88,
"learning_rate": 1.2449097579680261e-05,
"loss": 0.6272,
"step": 882
},
{
"epoch": 0.88,
"learning_rate": 1.2433466297005964e-05,
"loss": 0.6277,
"step": 883
},
{
"epoch": 0.88,
"learning_rate": 1.241782869165012e-05,
"loss": 0.6145,
"step": 884
},
{
"epoch": 0.88,
"learning_rate": 1.2402184804242658e-05,
"loss": 0.6369,
"step": 885
},
{
"epoch": 0.88,
"learning_rate": 1.2386534675429858e-05,
"loss": 0.5982,
"step": 886
},
{
"epoch": 0.88,
"learning_rate": 1.2370878345874195e-05,
"loss": 0.5905,
"step": 887
},
{
"epoch": 0.88,
"learning_rate": 1.235521585625426e-05,
"loss": 0.5931,
"step": 888
},
{
"epoch": 0.88,
"learning_rate": 1.2339547247264658e-05,
"loss": 0.593,
"step": 889
},
{
"epoch": 0.89,
"learning_rate": 1.2323872559615884e-05,
"loss": 0.6239,
"step": 890
},
{
"epoch": 0.89,
"learning_rate": 1.230819183403423e-05,
"loss": 0.5766,
"step": 891
},
{
"epoch": 0.89,
"learning_rate": 1.2292505111261674e-05,
"loss": 0.6276,
"step": 892
},
{
"epoch": 0.89,
"learning_rate": 1.227681243205578e-05,
"loss": 0.6529,
"step": 893
},
{
"epoch": 0.89,
"learning_rate": 1.2261113837189587e-05,
"loss": 0.5941,
"step": 894
},
{
"epoch": 0.89,
"learning_rate": 1.2245409367451498e-05,
"loss": 0.6162,
"step": 895
},
{
"epoch": 0.89,
"learning_rate": 1.2229699063645191e-05,
"loss": 0.6013,
"step": 896
},
{
"epoch": 0.89,
"learning_rate": 1.2213982966589492e-05,
"loss": 0.6211,
"step": 897
},
{
"epoch": 0.89,
"learning_rate": 1.2198261117118287e-05,
"loss": 0.6658,
"step": 898
},
{
"epoch": 0.89,
"learning_rate": 1.2182533556080402e-05,
"loss": 0.5758,
"step": 899
},
{
"epoch": 0.9,
"learning_rate": 1.2166800324339505e-05,
"loss": 0.6558,
"step": 900
},
{
"epoch": 0.9,
"learning_rate": 1.2151061462774006e-05,
"loss": 0.5785,
"step": 901
},
{
"epoch": 0.9,
"learning_rate": 1.213531701227692e-05,
"loss": 0.5656,
"step": 902
},
{
"epoch": 0.9,
"learning_rate": 1.2119567013755811e-05,
"loss": 0.643,
"step": 903
},
{
"epoch": 0.9,
"learning_rate": 1.2103811508132642e-05,
"loss": 0.6632,
"step": 904
},
{
"epoch": 0.9,
"learning_rate": 1.208805053634368e-05,
"loss": 0.6232,
"step": 905
},
{
"epoch": 0.9,
"learning_rate": 1.2072284139339414e-05,
"loss": 0.6222,
"step": 906
},
{
"epoch": 0.9,
"learning_rate": 1.2056512358084408e-05,
"loss": 0.6544,
"step": 907
},
{
"epoch": 0.9,
"learning_rate": 1.204073523355723e-05,
"loss": 0.6373,
"step": 908
},
{
"epoch": 0.9,
"learning_rate": 1.2024952806750321e-05,
"loss": 0.5505,
"step": 909
},
{
"epoch": 0.91,
"learning_rate": 1.2009165118669905e-05,
"loss": 0.6273,
"step": 910
},
{
"epoch": 0.91,
"learning_rate": 1.1993372210335875e-05,
"loss": 0.5647,
"step": 911
},
{
"epoch": 0.91,
"learning_rate": 1.197757412278168e-05,
"loss": 0.5965,
"step": 912
},
{
"epoch": 0.91,
"learning_rate": 1.1961770897054242e-05,
"loss": 0.5739,
"step": 913
},
{
"epoch": 0.91,
"learning_rate": 1.1945962574213814e-05,
"loss": 0.5656,
"step": 914
},
{
"epoch": 0.91,
"learning_rate": 1.1930149195333904e-05,
"loss": 0.6217,
"step": 915
},
{
"epoch": 0.91,
"learning_rate": 1.191433080150116e-05,
"loss": 0.6263,
"step": 916
},
{
"epoch": 0.91,
"learning_rate": 1.1898507433815244e-05,
"loss": 0.6491,
"step": 917
},
{
"epoch": 0.91,
"learning_rate": 1.1882679133388763e-05,
"loss": 0.6359,
"step": 918
},
{
"epoch": 0.91,
"learning_rate": 1.1866845941347118e-05,
"loss": 0.5871,
"step": 919
},
{
"epoch": 0.91,
"learning_rate": 1.1851007898828432e-05,
"loss": 0.6018,
"step": 920
},
{
"epoch": 0.92,
"learning_rate": 1.1835165046983436e-05,
"loss": 0.6192,
"step": 921
},
{
"epoch": 0.92,
"learning_rate": 1.1819317426975343e-05,
"loss": 0.6123,
"step": 922
},
{
"epoch": 0.92,
"learning_rate": 1.1803465079979762e-05,
"loss": 0.6608,
"step": 923
},
{
"epoch": 0.92,
"learning_rate": 1.1787608047184583e-05,
"loss": 0.6352,
"step": 924
},
{
"epoch": 0.92,
"learning_rate": 1.1771746369789872e-05,
"loss": 0.6021,
"step": 925
},
{
"epoch": 0.92,
"learning_rate": 1.1755880089007761e-05,
"loss": 0.6079,
"step": 926
},
{
"epoch": 0.92,
"learning_rate": 1.1740009246062343e-05,
"loss": 0.6216,
"step": 927
},
{
"epoch": 0.92,
"learning_rate": 1.1724133882189562e-05,
"loss": 0.6276,
"step": 928
},
{
"epoch": 0.92,
"learning_rate": 1.1708254038637115e-05,
"loss": 0.604,
"step": 929
},
{
"epoch": 0.92,
"learning_rate": 1.169236975666433e-05,
"loss": 0.5523,
"step": 930
},
{
"epoch": 0.93,
"learning_rate": 1.1676481077542072e-05,
"loss": 0.6034,
"step": 931
},
{
"epoch": 0.93,
"learning_rate": 1.166058804255263e-05,
"loss": 0.6243,
"step": 932
},
{
"epoch": 0.93,
"learning_rate": 1.1644690692989616e-05,
"loss": 0.5911,
"step": 933
},
{
"epoch": 0.93,
"learning_rate": 1.1628789070157836e-05,
"loss": 0.6007,
"step": 934
},
{
"epoch": 0.93,
"learning_rate": 1.1612883215373221e-05,
"loss": 0.5507,
"step": 935
},
{
"epoch": 0.93,
"learning_rate": 1.1596973169962675e-05,
"loss": 0.644,
"step": 936
},
{
"epoch": 0.93,
"learning_rate": 1.1581058975264003e-05,
"loss": 0.6084,
"step": 937
},
{
"epoch": 0.93,
"learning_rate": 1.1565140672625799e-05,
"loss": 0.6591,
"step": 938
},
{
"epoch": 0.93,
"learning_rate": 1.1549218303407305e-05,
"loss": 0.5793,
"step": 939
},
{
"epoch": 0.93,
"learning_rate": 1.1533291908978356e-05,
"loss": 0.5984,
"step": 940
},
{
"epoch": 0.94,
"learning_rate": 1.1517361530719233e-05,
"loss": 0.6161,
"step": 941
},
{
"epoch": 0.94,
"learning_rate": 1.1501427210020559e-05,
"loss": 0.5447,
"step": 942
},
{
"epoch": 0.94,
"learning_rate": 1.1485488988283217e-05,
"loss": 0.6139,
"step": 943
},
{
"epoch": 0.94,
"learning_rate": 1.1469546906918219e-05,
"loss": 0.5928,
"step": 944
},
{
"epoch": 0.94,
"learning_rate": 1.1453601007346599e-05,
"loss": 0.6124,
"step": 945
},
{
"epoch": 0.94,
"learning_rate": 1.1437651330999324e-05,
"loss": 0.6111,
"step": 946
},
{
"epoch": 0.94,
"learning_rate": 1.1421697919317161e-05,
"loss": 0.5999,
"step": 947
},
{
"epoch": 0.94,
"learning_rate": 1.1405740813750593e-05,
"loss": 0.6364,
"step": 948
},
{
"epoch": 0.94,
"learning_rate": 1.1389780055759689e-05,
"loss": 0.569,
"step": 949
},
{
"epoch": 0.94,
"learning_rate": 1.1373815686814025e-05,
"loss": 0.5304,
"step": 950
},
{
"epoch": 0.95,
"learning_rate": 1.1357847748392539e-05,
"loss": 0.6998,
"step": 951
},
{
"epoch": 0.95,
"learning_rate": 1.1341876281983457e-05,
"loss": 0.5927,
"step": 952
},
{
"epoch": 0.95,
"learning_rate": 1.1325901329084167e-05,
"loss": 0.6094,
"step": 953
},
{
"epoch": 0.95,
"learning_rate": 1.1309922931201114e-05,
"loss": 0.6459,
"step": 954
},
{
"epoch": 0.95,
"learning_rate": 1.1293941129849701e-05,
"loss": 0.6598,
"step": 955
},
{
"epoch": 0.95,
"learning_rate": 1.1277955966554165e-05,
"loss": 0.5724,
"step": 956
},
{
"epoch": 0.95,
"learning_rate": 1.126196748284748e-05,
"loss": 0.5728,
"step": 957
},
{
"epoch": 0.95,
"learning_rate": 1.1245975720271257e-05,
"loss": 0.6222,
"step": 958
},
{
"epoch": 0.95,
"learning_rate": 1.1229980720375609e-05,
"loss": 0.6267,
"step": 959
},
{
"epoch": 0.95,
"learning_rate": 1.1213982524719074e-05,
"loss": 0.5778,
"step": 960
},
{
"epoch": 0.96,
"learning_rate": 1.1197981174868488e-05,
"loss": 0.5592,
"step": 961
},
{
"epoch": 0.96,
"learning_rate": 1.1181976712398885e-05,
"loss": 0.633,
"step": 962
},
{
"epoch": 0.96,
"learning_rate": 1.1165969178893384e-05,
"loss": 0.636,
"step": 963
},
{
"epoch": 0.96,
"learning_rate": 1.114995861594308e-05,
"loss": 0.6022,
"step": 964
},
{
"epoch": 0.96,
"learning_rate": 1.1133945065146947e-05,
"loss": 0.5528,
"step": 965
},
{
"epoch": 0.96,
"learning_rate": 1.1117928568111715e-05,
"loss": 0.5927,
"step": 966
},
{
"epoch": 0.96,
"learning_rate": 1.1101909166451777e-05,
"loss": 0.5654,
"step": 967
},
{
"epoch": 0.96,
"learning_rate": 1.108588690178906e-05,
"loss": 0.5545,
"step": 968
},
{
"epoch": 0.96,
"learning_rate": 1.1069861815752944e-05,
"loss": 0.5902,
"step": 969
},
{
"epoch": 0.96,
"learning_rate": 1.105383394998013e-05,
"loss": 0.6311,
"step": 970
},
{
"epoch": 0.97,
"learning_rate": 1.1037803346114541e-05,
"loss": 0.5778,
"step": 971
},
{
"epoch": 0.97,
"learning_rate": 1.1021770045807228e-05,
"loss": 0.655,
"step": 972
},
{
"epoch": 0.97,
"learning_rate": 1.1005734090716227e-05,
"loss": 0.645,
"step": 973
},
{
"epoch": 0.97,
"learning_rate": 1.0989695522506486e-05,
"loss": 0.6269,
"step": 974
},
{
"epoch": 0.97,
"learning_rate": 1.097365438284974e-05,
"loss": 0.6069,
"step": 975
},
{
"epoch": 0.97,
"learning_rate": 1.09576107134244e-05,
"loss": 0.613,
"step": 976
},
{
"epoch": 0.97,
"learning_rate": 1.0941564555915455e-05,
"loss": 0.6226,
"step": 977
},
{
"epoch": 0.97,
"learning_rate": 1.0925515952014363e-05,
"loss": 0.5708,
"step": 978
},
{
"epoch": 0.97,
"learning_rate": 1.0909464943418926e-05,
"loss": 0.6813,
"step": 979
},
{
"epoch": 0.97,
"learning_rate": 1.0893411571833203e-05,
"loss": 0.6205,
"step": 980
},
{
"epoch": 0.98,
"learning_rate": 1.0877355878967391e-05,
"loss": 0.6243,
"step": 981
},
{
"epoch": 0.98,
"learning_rate": 1.0861297906537715e-05,
"loss": 0.6294,
"step": 982
},
{
"epoch": 0.98,
"learning_rate": 1.0845237696266326e-05,
"loss": 0.5664,
"step": 983
},
{
"epoch": 0.98,
"learning_rate": 1.0829175289881188e-05,
"loss": 0.5712,
"step": 984
},
{
"epoch": 0.98,
"learning_rate": 1.0813110729115975e-05,
"loss": 0.7026,
"step": 985
},
{
"epoch": 0.98,
"learning_rate": 1.0797044055709949e-05,
"loss": 0.6169,
"step": 986
},
{
"epoch": 0.98,
"learning_rate": 1.078097531140787e-05,
"loss": 0.6167,
"step": 987
},
{
"epoch": 0.98,
"learning_rate": 1.0764904537959875e-05,
"loss": 0.6129,
"step": 988
},
{
"epoch": 0.98,
"learning_rate": 1.074883177712138e-05,
"loss": 0.5771,
"step": 989
},
{
"epoch": 0.98,
"learning_rate": 1.073275707065295e-05,
"loss": 0.6294,
"step": 990
},
{
"epoch": 0.99,
"learning_rate": 1.0716680460320217e-05,
"loss": 0.6451,
"step": 991
},
{
"epoch": 0.99,
"learning_rate": 1.0700601987893759e-05,
"loss": 0.6639,
"step": 992
},
{
"epoch": 0.99,
"learning_rate": 1.0684521695148986e-05,
"loss": 0.6375,
"step": 993
},
{
"epoch": 0.99,
"learning_rate": 1.0668439623866043e-05,
"loss": 0.6193,
"step": 994
},
{
"epoch": 0.99,
"learning_rate": 1.0652355815829694e-05,
"loss": 0.6088,
"step": 995
},
{
"epoch": 0.99,
"learning_rate": 1.0636270312829215e-05,
"loss": 0.6115,
"step": 996
},
{
"epoch": 0.99,
"learning_rate": 1.0620183156658288e-05,
"loss": 0.6357,
"step": 997
},
{
"epoch": 0.99,
"learning_rate": 1.0604094389114887e-05,
"loss": 0.6447,
"step": 998
},
{
"epoch": 0.99,
"learning_rate": 1.0588004052001177e-05,
"loss": 0.6226,
"step": 999
},
{
"epoch": 0.99,
"learning_rate": 1.0571912187123399e-05,
"loss": 0.6733,
"step": 1000
},
{
"epoch": 1.0,
"learning_rate": 1.0555818836291759e-05,
"loss": 0.6605,
"step": 1001
},
{
"epoch": 1.0,
"learning_rate": 1.053972404132033e-05,
"loss": 0.5505,
"step": 1002
},
{
"epoch": 1.0,
"learning_rate": 1.0523627844026935e-05,
"loss": 0.587,
"step": 1003
},
{
"epoch": 1.0,
"learning_rate": 1.0507530286233042e-05,
"loss": 0.5886,
"step": 1004
},
{
"epoch": 1.0,
"learning_rate": 1.0491431409763654e-05,
"loss": 0.5763,
"step": 1005
},
{
"epoch": 1.0,
"learning_rate": 1.0475331256447195e-05,
"loss": 0.6181,
"step": 1006
},
{
"epoch": 1.0,
"learning_rate": 1.0459229868115412e-05,
"loss": 0.5827,
"step": 1007
},
{
"epoch": 1.0,
"learning_rate": 1.0443127286603256e-05,
"loss": 0.5975,
"step": 1008
},
{
"epoch": 1.0,
"learning_rate": 1.0427023553748792e-05,
"loss": 0.6164,
"step": 1009
},
{
"epoch": 1.0,
"learning_rate": 1.041091871139305e-05,
"loss": 0.5616,
"step": 1010
},
{
"epoch": 1.01,
"learning_rate": 1.0394812801379972e-05,
"loss": 0.5532,
"step": 1011
},
{
"epoch": 1.01,
"learning_rate": 1.0378705865556255e-05,
"loss": 0.5873,
"step": 1012
},
{
"epoch": 1.01,
"learning_rate": 1.0362597945771264e-05,
"loss": 0.6133,
"step": 1013
},
{
"epoch": 1.01,
"learning_rate": 1.0346489083876928e-05,
"loss": 0.5386,
"step": 1014
},
{
"epoch": 1.01,
"learning_rate": 1.0330379321727617e-05,
"loss": 0.5457,
"step": 1015
},
{
"epoch": 1.01,
"learning_rate": 1.0314268701180043e-05,
"loss": 0.5681,
"step": 1016
},
{
"epoch": 1.01,
"learning_rate": 1.0298157264093146e-05,
"loss": 0.5558,
"step": 1017
},
{
"epoch": 1.01,
"learning_rate": 1.0282045052327994e-05,
"loss": 0.5985,
"step": 1018
},
{
"epoch": 1.01,
"learning_rate": 1.0265932107747656e-05,
"loss": 0.5263,
"step": 1019
},
{
"epoch": 1.01,
"learning_rate": 1.0249818472217115e-05,
"loss": 0.5071,
"step": 1020
},
{
"epoch": 1.02,
"learning_rate": 1.0233704187603143e-05,
"loss": 0.5088,
"step": 1021
},
{
"epoch": 1.02,
"learning_rate": 1.0217589295774208e-05,
"loss": 0.5521,
"step": 1022
},
{
"epoch": 1.02,
"learning_rate": 1.0201473838600346e-05,
"loss": 0.5484,
"step": 1023
},
{
"epoch": 1.02,
"learning_rate": 1.0185357857953064e-05,
"loss": 0.6462,
"step": 1024
},
{
"epoch": 1.02,
"learning_rate": 1.0169241395705229e-05,
"loss": 0.5859,
"step": 1025
},
{
"epoch": 1.02,
"learning_rate": 1.0153124493730967e-05,
"loss": 0.566,
"step": 1026
},
{
"epoch": 1.02,
"learning_rate": 1.013700719390553e-05,
"loss": 0.5128,
"step": 1027
},
{
"epoch": 1.02,
"learning_rate": 1.0120889538105223e-05,
"loss": 0.5201,
"step": 1028
},
{
"epoch": 1.02,
"learning_rate": 1.0104771568207266e-05,
"loss": 0.5337,
"step": 1029
},
{
"epoch": 1.02,
"learning_rate": 1.0088653326089685e-05,
"loss": 0.5802,
"step": 1030
},
{
"epoch": 1.03,
"learning_rate": 1.0072534853631236e-05,
"loss": 0.5381,
"step": 1031
},
{
"epoch": 1.03,
"learning_rate": 1.0056416192711256e-05,
"loss": 0.5426,
"step": 1032
},
{
"epoch": 1.03,
"learning_rate": 1.0040297385209581e-05,
"loss": 0.4954,
"step": 1033
},
{
"epoch": 1.03,
"learning_rate": 1.0024178473006418e-05,
"loss": 0.5174,
"step": 1034
},
{
"epoch": 1.03,
"learning_rate": 1.0008059497982258e-05,
"loss": 0.5123,
"step": 1035
},
{
"epoch": 1.03,
"learning_rate": 9.991940502017745e-06,
"loss": 0.5067,
"step": 1036
},
{
"epoch": 1.03,
"learning_rate": 9.975821526993584e-06,
"loss": 0.5187,
"step": 1037
},
{
"epoch": 1.03,
"learning_rate": 9.95970261479042e-06,
"loss": 0.526,
"step": 1038
},
{
"epoch": 1.03,
"learning_rate": 9.943583807288746e-06,
"loss": 0.5253,
"step": 1039
},
{
"epoch": 1.03,
"learning_rate": 9.927465146368766e-06,
"loss": 0.4935,
"step": 1040
},
{
"epoch": 1.04,
"learning_rate": 9.911346673910318e-06,
"loss": 0.4796,
"step": 1041
},
{
"epoch": 1.04,
"learning_rate": 9.895228431792739e-06,
"loss": 0.4967,
"step": 1042
},
{
"epoch": 1.04,
"learning_rate": 9.879110461894778e-06,
"loss": 0.4785,
"step": 1043
},
{
"epoch": 1.04,
"learning_rate": 9.862992806094473e-06,
"loss": 0.5386,
"step": 1044
},
{
"epoch": 1.04,
"learning_rate": 9.846875506269038e-06,
"loss": 0.5061,
"step": 1045
},
{
"epoch": 1.04,
"learning_rate": 9.830758604294773e-06,
"loss": 0.5057,
"step": 1046
},
{
"epoch": 1.04,
"learning_rate": 9.814642142046938e-06,
"loss": 0.4852,
"step": 1047
},
{
"epoch": 1.04,
"learning_rate": 9.798526161399657e-06,
"loss": 0.4942,
"step": 1048
},
{
"epoch": 1.04,
"learning_rate": 9.782410704225793e-06,
"loss": 0.4849,
"step": 1049
},
{
"epoch": 1.04,
"learning_rate": 9.76629581239686e-06,
"loss": 0.4403,
"step": 1050
},
{
"epoch": 1.05,
"learning_rate": 9.750181527782892e-06,
"loss": 0.4734,
"step": 1051
},
{
"epoch": 1.05,
"learning_rate": 9.734067892252349e-06,
"loss": 0.4184,
"step": 1052
},
{
"epoch": 1.05,
"learning_rate": 9.71795494767201e-06,
"loss": 0.4972,
"step": 1053
},
{
"epoch": 1.05,
"learning_rate": 9.701842735906855e-06,
"loss": 0.4304,
"step": 1054
},
{
"epoch": 1.05,
"learning_rate": 9.685731298819957e-06,
"loss": 0.4736,
"step": 1055
},
{
"epoch": 1.05,
"learning_rate": 9.669620678272386e-06,
"loss": 0.4434,
"step": 1056
},
{
"epoch": 1.05,
"learning_rate": 9.653510916123074e-06,
"loss": 0.47,
"step": 1057
},
{
"epoch": 1.05,
"learning_rate": 9.637402054228739e-06,
"loss": 0.456,
"step": 1058
},
{
"epoch": 1.05,
"learning_rate": 9.621294134443747e-06,
"loss": 0.4444,
"step": 1059
},
{
"epoch": 1.05,
"learning_rate": 9.60518719862003e-06,
"loss": 0.4903,
"step": 1060
},
{
"epoch": 1.06,
"learning_rate": 9.589081288606952e-06,
"loss": 0.4554,
"step": 1061
},
{
"epoch": 1.06,
"learning_rate": 9.572976446251215e-06,
"loss": 0.4465,
"step": 1062
},
{
"epoch": 1.06,
"learning_rate": 9.556872713396746e-06,
"loss": 0.4435,
"step": 1063
},
{
"epoch": 1.06,
"learning_rate": 9.54077013188459e-06,
"loss": 0.4831,
"step": 1064
},
{
"epoch": 1.06,
"learning_rate": 9.524668743552807e-06,
"loss": 0.4435,
"step": 1065
},
{
"epoch": 1.06,
"learning_rate": 9.508568590236349e-06,
"loss": 0.4574,
"step": 1066
},
{
"epoch": 1.06,
"learning_rate": 9.492469713766961e-06,
"loss": 0.4344,
"step": 1067
},
{
"epoch": 1.06,
"learning_rate": 9.47637215597307e-06,
"loss": 0.4691,
"step": 1068
},
{
"epoch": 1.06,
"learning_rate": 9.460275958679674e-06,
"loss": 0.4686,
"step": 1069
},
{
"epoch": 1.06,
"learning_rate": 9.444181163708245e-06,
"loss": 0.5084,
"step": 1070
},
{
"epoch": 1.07,
"learning_rate": 9.428087812876604e-06,
"loss": 0.5401,
"step": 1071
},
{
"epoch": 1.07,
"learning_rate": 9.411995947998823e-06,
"loss": 0.4631,
"step": 1072
},
{
"epoch": 1.07,
"learning_rate": 9.395905610885116e-06,
"loss": 0.4554,
"step": 1073
},
{
"epoch": 1.07,
"learning_rate": 9.379816843341715e-06,
"loss": 0.4259,
"step": 1074
},
{
"epoch": 1.07,
"learning_rate": 9.363729687170787e-06,
"loss": 0.5428,
"step": 1075
},
{
"epoch": 1.07,
"learning_rate": 9.34764418417031e-06,
"loss": 0.4416,
"step": 1076
},
{
"epoch": 1.07,
"learning_rate": 9.331560376133958e-06,
"loss": 0.4454,
"step": 1077
},
{
"epoch": 1.07,
"learning_rate": 9.315478304851017e-06,
"loss": 0.4477,
"step": 1078
},
{
"epoch": 1.07,
"learning_rate": 9.299398012106246e-06,
"loss": 0.4558,
"step": 1079
},
{
"epoch": 1.07,
"learning_rate": 9.283319539679787e-06,
"loss": 0.4508,
"step": 1080
},
{
"epoch": 1.08,
"learning_rate": 9.267242929347052e-06,
"loss": 0.4171,
"step": 1081
},
{
"epoch": 1.08,
"learning_rate": 9.251168222878624e-06,
"loss": 0.4752,
"step": 1082
},
{
"epoch": 1.08,
"learning_rate": 9.235095462040125e-06,
"loss": 0.4687,
"step": 1083
},
{
"epoch": 1.08,
"learning_rate": 9.219024688592136e-06,
"loss": 0.4312,
"step": 1084
},
{
"epoch": 1.08,
"learning_rate": 9.202955944290058e-06,
"loss": 0.4642,
"step": 1085
},
{
"epoch": 1.08,
"learning_rate": 9.18688927088403e-06,
"loss": 0.4324,
"step": 1086
},
{
"epoch": 1.08,
"learning_rate": 9.170824710118814e-06,
"loss": 0.4864,
"step": 1087
},
{
"epoch": 1.08,
"learning_rate": 9.154762303733676e-06,
"loss": 0.4702,
"step": 1088
},
{
"epoch": 1.08,
"learning_rate": 9.138702093462286e-06,
"loss": 0.4753,
"step": 1089
},
{
"epoch": 1.08,
"learning_rate": 9.122644121032614e-06,
"loss": 0.5504,
"step": 1090
},
{
"epoch": 1.09,
"learning_rate": 9.106588428166799e-06,
"loss": 0.5089,
"step": 1091
},
{
"epoch": 1.09,
"learning_rate": 9.090535056581075e-06,
"loss": 0.4596,
"step": 1092
},
{
"epoch": 1.09,
"learning_rate": 9.074484047985638e-06,
"loss": 0.4191,
"step": 1093
},
{
"epoch": 1.09,
"learning_rate": 9.058435444084543e-06,
"loss": 0.4464,
"step": 1094
},
{
"epoch": 1.09,
"learning_rate": 9.042389286575603e-06,
"loss": 0.4733,
"step": 1095
},
{
"epoch": 1.09,
"learning_rate": 9.026345617150265e-06,
"loss": 0.4598,
"step": 1096
},
{
"epoch": 1.09,
"learning_rate": 9.01030447749352e-06,
"loss": 0.4918,
"step": 1097
},
{
"epoch": 1.09,
"learning_rate": 8.994265909283776e-06,
"loss": 0.4966,
"step": 1098
},
{
"epoch": 1.09,
"learning_rate": 8.978229954192775e-06,
"loss": 0.4567,
"step": 1099
},
{
"epoch": 1.09,
"learning_rate": 8.962196653885459e-06,
"loss": 0.4587,
"step": 1100
},
{
"epoch": 1.09,
"learning_rate": 8.946166050019875e-06,
"loss": 0.6759,
"step": 1101
},
{
"epoch": 1.1,
"learning_rate": 8.93013818424706e-06,
"loss": 0.5431,
"step": 1102
},
{
"epoch": 1.1,
"learning_rate": 8.914113098210941e-06,
"loss": 0.4956,
"step": 1103
},
{
"epoch": 1.1,
"learning_rate": 8.898090833548226e-06,
"loss": 0.421,
"step": 1104
},
{
"epoch": 1.1,
"learning_rate": 8.882071431888286e-06,
"loss": 0.472,
"step": 1105
},
{
"epoch": 1.1,
"learning_rate": 8.866054934853053e-06,
"loss": 0.4715,
"step": 1106
},
{
"epoch": 1.1,
"learning_rate": 8.850041384056924e-06,
"loss": 0.4357,
"step": 1107
},
{
"epoch": 1.1,
"learning_rate": 8.83403082110662e-06,
"loss": 0.4281,
"step": 1108
},
{
"epoch": 1.1,
"learning_rate": 8.818023287601117e-06,
"loss": 0.4434,
"step": 1109
},
{
"epoch": 1.1,
"learning_rate": 8.802018825131513e-06,
"loss": 0.4632,
"step": 1110
},
{
"epoch": 1.1,
"learning_rate": 8.786017475280927e-06,
"loss": 0.4511,
"step": 1111
},
{
"epoch": 1.11,
"learning_rate": 8.770019279624398e-06,
"loss": 0.4479,
"step": 1112
},
{
"epoch": 1.11,
"learning_rate": 8.754024279728748e-06,
"loss": 0.4924,
"step": 1113
},
{
"epoch": 1.11,
"learning_rate": 8.738032517152523e-06,
"loss": 0.4408,
"step": 1114
},
{
"epoch": 1.11,
"learning_rate": 8.72204403344584e-06,
"loss": 0.5394,
"step": 1115
},
{
"epoch": 1.11,
"learning_rate": 8.7060588701503e-06,
"loss": 0.5221,
"step": 1116
},
{
"epoch": 1.11,
"learning_rate": 8.690077068798886e-06,
"loss": 0.3811,
"step": 1117
},
{
"epoch": 1.11,
"learning_rate": 8.674098670915838e-06,
"loss": 0.4497,
"step": 1118
},
{
"epoch": 1.11,
"learning_rate": 8.658123718016548e-06,
"loss": 0.4778,
"step": 1119
},
{
"epoch": 1.11,
"learning_rate": 8.642152251607465e-06,
"loss": 0.5056,
"step": 1120
},
{
"epoch": 1.11,
"learning_rate": 8.626184313185979e-06,
"loss": 0.4521,
"step": 1121
},
{
"epoch": 1.12,
"learning_rate": 8.610219944240313e-06,
"loss": 0.4424,
"step": 1122
},
{
"epoch": 1.12,
"learning_rate": 8.594259186249409e-06,
"loss": 0.4574,
"step": 1123
},
{
"epoch": 1.12,
"learning_rate": 8.578302080682844e-06,
"loss": 0.5127,
"step": 1124
},
{
"epoch": 1.12,
"learning_rate": 8.56234866900068e-06,
"loss": 0.4494,
"step": 1125
},
{
"epoch": 1.12,
"learning_rate": 8.546398992653403e-06,
"loss": 0.5,
"step": 1126
},
{
"epoch": 1.12,
"learning_rate": 8.530453093081784e-06,
"loss": 0.4865,
"step": 1127
},
{
"epoch": 1.12,
"learning_rate": 8.514511011716783e-06,
"loss": 0.4455,
"step": 1128
},
{
"epoch": 1.12,
"learning_rate": 8.498572789979446e-06,
"loss": 0.4628,
"step": 1129
},
{
"epoch": 1.12,
"learning_rate": 8.482638469280772e-06,
"loss": 0.4743,
"step": 1130
},
{
"epoch": 1.12,
"learning_rate": 8.466708091021645e-06,
"loss": 0.4697,
"step": 1131
},
{
"epoch": 1.13,
"learning_rate": 8.450781696592696e-06,
"loss": 0.4999,
"step": 1132
},
{
"epoch": 1.13,
"learning_rate": 8.434859327374205e-06,
"loss": 0.4862,
"step": 1133
},
{
"epoch": 1.13,
"learning_rate": 8.418941024735997e-06,
"loss": 0.4133,
"step": 1134
},
{
"epoch": 1.13,
"learning_rate": 8.403026830037331e-06,
"loss": 0.4194,
"step": 1135
},
{
"epoch": 1.13,
"learning_rate": 8.387116784626785e-06,
"loss": 0.4846,
"step": 1136
},
{
"epoch": 1.13,
"learning_rate": 8.371210929842166e-06,
"loss": 0.4354,
"step": 1137
},
{
"epoch": 1.13,
"learning_rate": 8.355309307010386e-06,
"loss": 0.4425,
"step": 1138
},
{
"epoch": 1.13,
"learning_rate": 8.33941195744737e-06,
"loss": 0.4387,
"step": 1139
},
{
"epoch": 1.13,
"learning_rate": 8.32351892245793e-06,
"loss": 0.4361,
"step": 1140
},
{
"epoch": 1.13,
"learning_rate": 8.307630243335676e-06,
"loss": 0.4334,
"step": 1141
},
{
"epoch": 1.14,
"learning_rate": 8.29174596136289e-06,
"loss": 0.4852,
"step": 1142
},
{
"epoch": 1.14,
"learning_rate": 8.27586611781044e-06,
"loss": 0.465,
"step": 1143
},
{
"epoch": 1.14,
"learning_rate": 8.259990753937662e-06,
"loss": 0.4546,
"step": 1144
},
{
"epoch": 1.14,
"learning_rate": 8.24411991099224e-06,
"loss": 0.4274,
"step": 1145
},
{
"epoch": 1.14,
"learning_rate": 8.228253630210133e-06,
"loss": 0.4261,
"step": 1146
},
{
"epoch": 1.14,
"learning_rate": 8.212391952815419e-06,
"loss": 0.4052,
"step": 1147
},
{
"epoch": 1.14,
"learning_rate": 8.196534920020241e-06,
"loss": 0.4525,
"step": 1148
},
{
"epoch": 1.14,
"learning_rate": 8.18068257302466e-06,
"loss": 0.4151,
"step": 1149
},
{
"epoch": 1.14,
"learning_rate": 8.164834953016566e-06,
"loss": 0.4807,
"step": 1150
},
{
"epoch": 1.14,
"learning_rate": 8.148992101171566e-06,
"loss": 0.543,
"step": 1151
},
{
"epoch": 1.15,
"learning_rate": 8.133154058652887e-06,
"loss": 0.4785,
"step": 1152
},
{
"epoch": 1.15,
"learning_rate": 8.117320866611242e-06,
"loss": 0.3805,
"step": 1153
},
{
"epoch": 1.15,
"learning_rate": 8.101492566184757e-06,
"loss": 0.4612,
"step": 1154
},
{
"epoch": 1.15,
"learning_rate": 8.085669198498842e-06,
"loss": 0.4322,
"step": 1155
},
{
"epoch": 1.15,
"learning_rate": 8.069850804666096e-06,
"loss": 0.4129,
"step": 1156
},
{
"epoch": 1.15,
"learning_rate": 8.054037425786189e-06,
"loss": 0.456,
"step": 1157
},
{
"epoch": 1.15,
"learning_rate": 8.038229102945763e-06,
"loss": 0.4281,
"step": 1158
},
{
"epoch": 1.15,
"learning_rate": 8.022425877218321e-06,
"loss": 0.4815,
"step": 1159
},
{
"epoch": 1.15,
"learning_rate": 8.00662778966413e-06,
"loss": 0.4849,
"step": 1160
},
{
"epoch": 1.15,
"learning_rate": 7.990834881330098e-06,
"loss": 0.4722,
"step": 1161
},
{
"epoch": 1.16,
"learning_rate": 7.97504719324968e-06,
"loss": 0.4204,
"step": 1162
},
{
"epoch": 1.16,
"learning_rate": 7.959264766442775e-06,
"loss": 0.4397,
"step": 1163
},
{
"epoch": 1.16,
"learning_rate": 7.943487641915595e-06,
"loss": 0.4367,
"step": 1164
},
{
"epoch": 1.16,
"learning_rate": 7.927715860660589e-06,
"loss": 0.4946,
"step": 1165
},
{
"epoch": 1.16,
"learning_rate": 7.911949463656322e-06,
"loss": 0.5255,
"step": 1166
},
{
"epoch": 1.16,
"learning_rate": 7.896188491867361e-06,
"loss": 0.5271,
"step": 1167
},
{
"epoch": 1.16,
"learning_rate": 7.88043298624419e-06,
"loss": 0.45,
"step": 1168
},
{
"epoch": 1.16,
"learning_rate": 7.864682987723082e-06,
"loss": 0.462,
"step": 1169
},
{
"epoch": 1.16,
"learning_rate": 7.848938537226001e-06,
"loss": 0.3939,
"step": 1170
},
{
"epoch": 1.16,
"learning_rate": 7.833199675660496e-06,
"loss": 0.4968,
"step": 1171
},
{
"epoch": 1.17,
"learning_rate": 7.8174664439196e-06,
"loss": 0.4905,
"step": 1172
},
{
"epoch": 1.17,
"learning_rate": 7.801738882881715e-06,
"loss": 0.4115,
"step": 1173
},
{
"epoch": 1.17,
"learning_rate": 7.78601703341051e-06,
"loss": 0.4309,
"step": 1174
},
{
"epoch": 1.17,
"learning_rate": 7.770300936354814e-06,
"loss": 0.4585,
"step": 1175
},
{
"epoch": 1.17,
"learning_rate": 7.754590632548506e-06,
"loss": 0.4238,
"step": 1176
},
{
"epoch": 1.17,
"learning_rate": 7.738886162810417e-06,
"loss": 0.4687,
"step": 1177
},
{
"epoch": 1.17,
"learning_rate": 7.723187567944223e-06,
"loss": 0.4527,
"step": 1178
},
{
"epoch": 1.17,
"learning_rate": 7.70749488873833e-06,
"loss": 0.4223,
"step": 1179
},
{
"epoch": 1.17,
"learning_rate": 7.691808165965776e-06,
"loss": 0.4414,
"step": 1180
},
{
"epoch": 1.17,
"learning_rate": 7.67612744038412e-06,
"loss": 0.4325,
"step": 1181
},
{
"epoch": 1.18,
"learning_rate": 7.660452752735345e-06,
"loss": 0.4703,
"step": 1182
},
{
"epoch": 1.18,
"learning_rate": 7.644784143745743e-06,
"loss": 0.3923,
"step": 1183
},
{
"epoch": 1.18,
"learning_rate": 7.629121654125808e-06,
"loss": 0.4444,
"step": 1184
},
{
"epoch": 1.18,
"learning_rate": 7.613465324570144e-06,
"loss": 0.601,
"step": 1185
},
{
"epoch": 1.18,
"learning_rate": 7.597815195757343e-06,
"loss": 0.5431,
"step": 1186
},
{
"epoch": 1.18,
"learning_rate": 7.582171308349886e-06,
"loss": 0.4524,
"step": 1187
},
{
"epoch": 1.18,
"learning_rate": 7.566533702994038e-06,
"loss": 0.4335,
"step": 1188
},
{
"epoch": 1.18,
"learning_rate": 7.550902420319742e-06,
"loss": 0.4856,
"step": 1189
},
{
"epoch": 1.18,
"learning_rate": 7.53527750094052e-06,
"loss": 0.4863,
"step": 1190
},
{
"epoch": 1.18,
"learning_rate": 7.519658985453351e-06,
"loss": 0.48,
"step": 1191
},
{
"epoch": 1.19,
"learning_rate": 7.5040469144385795e-06,
"loss": 0.4425,
"step": 1192
},
{
"epoch": 1.19,
"learning_rate": 7.488441328459806e-06,
"loss": 0.4289,
"step": 1193
},
{
"epoch": 1.19,
"learning_rate": 7.472842268063776e-06,
"loss": 0.4446,
"step": 1194
},
{
"epoch": 1.19,
"learning_rate": 7.457249773780292e-06,
"loss": 0.4995,
"step": 1195
},
{
"epoch": 1.19,
"learning_rate": 7.441663886122086e-06,
"loss": 0.4633,
"step": 1196
},
{
"epoch": 1.19,
"learning_rate": 7.4260846455847314e-06,
"loss": 0.4416,
"step": 1197
},
{
"epoch": 1.19,
"learning_rate": 7.41051209264652e-06,
"loss": 0.4072,
"step": 1198
},
{
"epoch": 1.19,
"learning_rate": 7.394946267768381e-06,
"loss": 0.4066,
"step": 1199
},
{
"epoch": 1.19,
"learning_rate": 7.379387211393756e-06,
"loss": 0.4205,
"step": 1200
},
{
"epoch": 1.19,
"learning_rate": 7.363834963948499e-06,
"loss": 0.4264,
"step": 1201
},
{
"epoch": 1.2,
"learning_rate": 7.348289565840777e-06,
"loss": 0.4995,
"step": 1202
},
{
"epoch": 1.2,
"learning_rate": 7.332751057460961e-06,
"loss": 0.4385,
"step": 1203
},
{
"epoch": 1.2,
"learning_rate": 7.317219479181517e-06,
"loss": 0.4439,
"step": 1204
},
{
"epoch": 1.2,
"learning_rate": 7.3016948713569046e-06,
"loss": 0.4277,
"step": 1205
},
{
"epoch": 1.2,
"learning_rate": 7.286177274323476e-06,
"loss": 0.4873,
"step": 1206
},
{
"epoch": 1.2,
"learning_rate": 7.27066672839937e-06,
"loss": 0.4533,
"step": 1207
},
{
"epoch": 1.2,
"learning_rate": 7.255163273884401e-06,
"loss": 0.4733,
"step": 1208
},
{
"epoch": 1.2,
"learning_rate": 7.23966695105996e-06,
"loss": 0.5394,
"step": 1209
},
{
"epoch": 1.2,
"learning_rate": 7.224177800188906e-06,
"loss": 0.4487,
"step": 1210
},
{
"epoch": 1.2,
"learning_rate": 7.208695861515462e-06,
"loss": 0.4575,
"step": 1211
},
{
"epoch": 1.21,
"learning_rate": 7.193221175265124e-06,
"loss": 0.522,
"step": 1212
},
{
"epoch": 1.21,
"learning_rate": 7.1777537816445295e-06,
"loss": 0.4806,
"step": 1213
},
{
"epoch": 1.21,
"learning_rate": 7.162293720841378e-06,
"loss": 0.443,
"step": 1214
},
{
"epoch": 1.21,
"learning_rate": 7.146841033024312e-06,
"loss": 0.4517,
"step": 1215
},
{
"epoch": 1.21,
"learning_rate": 7.131395758342815e-06,
"loss": 0.3867,
"step": 1216
},
{
"epoch": 1.21,
"learning_rate": 7.11595793692712e-06,
"loss": 0.4784,
"step": 1217
},
{
"epoch": 1.21,
"learning_rate": 7.1005276088880855e-06,
"loss": 0.5366,
"step": 1218
},
{
"epoch": 1.21,
"learning_rate": 7.085104814317101e-06,
"loss": 0.4215,
"step": 1219
},
{
"epoch": 1.21,
"learning_rate": 7.069689593285991e-06,
"loss": 0.4384,
"step": 1220
},
{
"epoch": 1.21,
"learning_rate": 7.0542819858468895e-06,
"loss": 0.4139,
"step": 1221
},
{
"epoch": 1.22,
"learning_rate": 7.038882032032157e-06,
"loss": 0.4231,
"step": 1222
},
{
"epoch": 1.22,
"learning_rate": 7.023489771854264e-06,
"loss": 0.4137,
"step": 1223
},
{
"epoch": 1.22,
"learning_rate": 7.008105245305699e-06,
"loss": 0.4484,
"step": 1224
},
{
"epoch": 1.22,
"learning_rate": 6.992728492358847e-06,
"loss": 0.4556,
"step": 1225
},
{
"epoch": 1.22,
"learning_rate": 6.977359552965903e-06,
"loss": 0.4686,
"step": 1226
},
{
"epoch": 1.22,
"learning_rate": 6.961998467058753e-06,
"loss": 0.4852,
"step": 1227
},
{
"epoch": 1.22,
"learning_rate": 6.9466452745488835e-06,
"loss": 0.4396,
"step": 1228
},
{
"epoch": 1.22,
"learning_rate": 6.931300015327274e-06,
"loss": 0.4397,
"step": 1229
},
{
"epoch": 1.22,
"learning_rate": 6.915962729264287e-06,
"loss": 0.4521,
"step": 1230
},
{
"epoch": 1.22,
"learning_rate": 6.900633456209574e-06,
"loss": 0.42,
"step": 1231
},
{
"epoch": 1.23,
"learning_rate": 6.8853122359919565e-06,
"loss": 0.4065,
"step": 1232
},
{
"epoch": 1.23,
"learning_rate": 6.869999108419343e-06,
"loss": 0.3824,
"step": 1233
},
{
"epoch": 1.23,
"learning_rate": 6.854694113278614e-06,
"loss": 0.4728,
"step": 1234
},
{
"epoch": 1.23,
"learning_rate": 6.8393972903355185e-06,
"loss": 0.4246,
"step": 1235
},
{
"epoch": 1.23,
"learning_rate": 6.8241086793345715e-06,
"loss": 0.3887,
"step": 1236
},
{
"epoch": 1.23,
"learning_rate": 6.808828319998957e-06,
"loss": 0.4785,
"step": 1237
},
{
"epoch": 1.23,
"learning_rate": 6.7935562520304065e-06,
"loss": 0.4461,
"step": 1238
},
{
"epoch": 1.23,
"learning_rate": 6.7782925151091224e-06,
"loss": 0.4633,
"step": 1239
},
{
"epoch": 1.23,
"learning_rate": 6.763037148893656e-06,
"loss": 0.419,
"step": 1240
},
{
"epoch": 1.23,
"learning_rate": 6.747790193020808e-06,
"loss": 0.4209,
"step": 1241
},
{
"epoch": 1.24,
"learning_rate": 6.7325516871055305e-06,
"loss": 0.3817,
"step": 1242
},
{
"epoch": 1.24,
"learning_rate": 6.717321670740822e-06,
"loss": 0.4239,
"step": 1243
},
{
"epoch": 1.24,
"learning_rate": 6.702100183497613e-06,
"loss": 0.4402,
"step": 1244
},
{
"epoch": 1.24,
"learning_rate": 6.686887264924682e-06,
"loss": 0.4443,
"step": 1245
},
{
"epoch": 1.24,
"learning_rate": 6.6716829545485485e-06,
"loss": 0.4481,
"step": 1246
},
{
"epoch": 1.24,
"learning_rate": 6.656487291873357e-06,
"loss": 0.4729,
"step": 1247
},
{
"epoch": 1.24,
"learning_rate": 6.64130031638079e-06,
"loss": 0.4976,
"step": 1248
},
{
"epoch": 1.24,
"learning_rate": 6.62612206752995e-06,
"loss": 0.4698,
"step": 1249
},
{
"epoch": 1.24,
"learning_rate": 6.610952584757273e-06,
"loss": 0.4696,
"step": 1250
},
{
"epoch": 1.24,
"learning_rate": 6.59579190747642e-06,
"loss": 0.4466,
"step": 1251
},
{
"epoch": 1.25,
"learning_rate": 6.580640075078169e-06,
"loss": 0.4938,
"step": 1252
},
{
"epoch": 1.25,
"learning_rate": 6.565497126930318e-06,
"loss": 0.4727,
"step": 1253
},
{
"epoch": 1.25,
"learning_rate": 6.550363102377588e-06,
"loss": 0.4173,
"step": 1254
},
{
"epoch": 1.25,
"learning_rate": 6.535238040741498e-06,
"loss": 0.3832,
"step": 1255
},
{
"epoch": 1.25,
"learning_rate": 6.5201219813203e-06,
"loss": 0.411,
"step": 1256
},
{
"epoch": 1.25,
"learning_rate": 6.505014963388843e-06,
"loss": 0.4297,
"step": 1257
},
{
"epoch": 1.25,
"learning_rate": 6.4899170261984844e-06,
"loss": 0.4091,
"step": 1258
},
{
"epoch": 1.25,
"learning_rate": 6.474828208976998e-06,
"loss": 0.347,
"step": 1259
},
{
"epoch": 1.25,
"learning_rate": 6.459748550928454e-06,
"loss": 0.4239,
"step": 1260
},
{
"epoch": 1.25,
"learning_rate": 6.444678091233122e-06,
"loss": 0.4469,
"step": 1261
},
{
"epoch": 1.26,
"learning_rate": 6.429616869047376e-06,
"loss": 0.4475,
"step": 1262
},
{
"epoch": 1.26,
"learning_rate": 6.4145649235035944e-06,
"loss": 0.4572,
"step": 1263
},
{
"epoch": 1.26,
"learning_rate": 6.3995222937100455e-06,
"loss": 0.4116,
"step": 1264
},
{
"epoch": 1.26,
"learning_rate": 6.384489018750797e-06,
"loss": 0.4882,
"step": 1265
},
{
"epoch": 1.26,
"learning_rate": 6.369465137685604e-06,
"loss": 0.4142,
"step": 1266
},
{
"epoch": 1.26,
"learning_rate": 6.354450689549823e-06,
"loss": 0.4881,
"step": 1267
},
{
"epoch": 1.26,
"learning_rate": 6.339445713354299e-06,
"loss": 0.4133,
"step": 1268
},
{
"epoch": 1.26,
"learning_rate": 6.324450248085265e-06,
"loss": 0.4634,
"step": 1269
},
{
"epoch": 1.26,
"learning_rate": 6.309464332704243e-06,
"loss": 0.4565,
"step": 1270
},
{
"epoch": 1.26,
"learning_rate": 6.294488006147947e-06,
"loss": 0.4402,
"step": 1271
},
{
"epoch": 1.27,
"learning_rate": 6.279521307328163e-06,
"loss": 0.4717,
"step": 1272
},
{
"epoch": 1.27,
"learning_rate": 6.264564275131679e-06,
"loss": 0.3883,
"step": 1273
},
{
"epoch": 1.27,
"learning_rate": 6.249616948420161e-06,
"loss": 0.4883,
"step": 1274
},
{
"epoch": 1.27,
"learning_rate": 6.23467936603005e-06,
"loss": 0.5146,
"step": 1275
},
{
"epoch": 1.27,
"learning_rate": 6.219751566772483e-06,
"loss": 0.382,
"step": 1276
},
{
"epoch": 1.27,
"learning_rate": 6.20483358943317e-06,
"loss": 0.5157,
"step": 1277
},
{
"epoch": 1.27,
"learning_rate": 6.1899254727723e-06,
"loss": 0.5106,
"step": 1278
},
{
"epoch": 1.27,
"learning_rate": 6.175027255524446e-06,
"loss": 0.3965,
"step": 1279
},
{
"epoch": 1.27,
"learning_rate": 6.160138976398456e-06,
"loss": 0.4031,
"step": 1280
},
{
"epoch": 1.27,
"learning_rate": 6.145260674077363e-06,
"loss": 0.4331,
"step": 1281
},
{
"epoch": 1.27,
"learning_rate": 6.130392387218274e-06,
"loss": 0.4464,
"step": 1282
},
{
"epoch": 1.28,
"learning_rate": 6.115534154452269e-06,
"loss": 0.4845,
"step": 1283
},
{
"epoch": 1.28,
"learning_rate": 6.100686014384315e-06,
"loss": 0.4564,
"step": 1284
},
{
"epoch": 1.28,
"learning_rate": 6.08584800559315e-06,
"loss": 0.4286,
"step": 1285
},
{
"epoch": 1.28,
"learning_rate": 6.07102016663119e-06,
"loss": 0.4857,
"step": 1286
},
{
"epoch": 1.28,
"learning_rate": 6.056202536024425e-06,
"loss": 0.4496,
"step": 1287
},
{
"epoch": 1.28,
"learning_rate": 6.041395152272331e-06,
"loss": 0.4527,
"step": 1288
},
{
"epoch": 1.28,
"learning_rate": 6.026598053847743e-06,
"loss": 0.5159,
"step": 1289
},
{
"epoch": 1.28,
"learning_rate": 6.011811279196788e-06,
"loss": 0.5407,
"step": 1290
},
{
"epoch": 1.28,
"learning_rate": 5.997034866738765e-06,
"loss": 0.4741,
"step": 1291
},
{
"epoch": 1.28,
"learning_rate": 5.982268854866045e-06,
"loss": 0.4186,
"step": 1292
},
{
"epoch": 1.29,
"learning_rate": 5.967513281943984e-06,
"loss": 0.4449,
"step": 1293
},
{
"epoch": 1.29,
"learning_rate": 5.952768186310813e-06,
"loss": 0.3943,
"step": 1294
},
{
"epoch": 1.29,
"learning_rate": 5.938033606277534e-06,
"loss": 0.4286,
"step": 1295
},
{
"epoch": 1.29,
"learning_rate": 5.923309580127832e-06,
"loss": 0.4185,
"step": 1296
},
{
"epoch": 1.29,
"learning_rate": 5.908596146117971e-06,
"loss": 0.4808,
"step": 1297
},
{
"epoch": 1.29,
"learning_rate": 5.8938933424766985e-06,
"loss": 0.4532,
"step": 1298
},
{
"epoch": 1.29,
"learning_rate": 5.879201207405136e-06,
"loss": 0.4193,
"step": 1299
},
{
"epoch": 1.29,
"learning_rate": 5.864519779076685e-06,
"loss": 0.4765,
"step": 1300
},
{
"epoch": 1.29,
"learning_rate": 5.84984909563693e-06,
"loss": 0.4474,
"step": 1301
},
{
"epoch": 1.29,
"learning_rate": 5.8351891952035415e-06,
"loss": 0.4204,
"step": 1302
},
{
"epoch": 1.3,
"learning_rate": 5.820540115866164e-06,
"loss": 0.4322,
"step": 1303
},
{
"epoch": 1.3,
"learning_rate": 5.805901895686344e-06,
"loss": 0.4613,
"step": 1304
},
{
"epoch": 1.3,
"learning_rate": 5.791274572697401e-06,
"loss": 0.3923,
"step": 1305
},
{
"epoch": 1.3,
"learning_rate": 5.776658184904334e-06,
"loss": 0.4037,
"step": 1306
},
{
"epoch": 1.3,
"learning_rate": 5.7620527702837415e-06,
"loss": 0.4529,
"step": 1307
},
{
"epoch": 1.3,
"learning_rate": 5.747458366783707e-06,
"loss": 0.4309,
"step": 1308
},
{
"epoch": 1.3,
"learning_rate": 5.732875012323712e-06,
"loss": 0.4296,
"step": 1309
},
{
"epoch": 1.3,
"learning_rate": 5.718302744794522e-06,
"loss": 0.398,
"step": 1310
},
{
"epoch": 1.3,
"learning_rate": 5.7037416020581014e-06,
"loss": 0.4507,
"step": 1311
},
{
"epoch": 1.3,
"learning_rate": 5.689191621947495e-06,
"loss": 0.4555,
"step": 1312
},
{
"epoch": 1.31,
"learning_rate": 5.67465284226677e-06,
"loss": 0.4147,
"step": 1313
},
{
"epoch": 1.31,
"learning_rate": 5.660125300790873e-06,
"loss": 0.3885,
"step": 1314
},
{
"epoch": 1.31,
"learning_rate": 5.645609035265558e-06,
"loss": 0.4455,
"step": 1315
},
{
"epoch": 1.31,
"learning_rate": 5.631104083407285e-06,
"loss": 0.4349,
"step": 1316
},
{
"epoch": 1.31,
"learning_rate": 5.616610482903102e-06,
"loss": 0.4201,
"step": 1317
},
{
"epoch": 1.31,
"learning_rate": 5.60212827141059e-06,
"loss": 0.4361,
"step": 1318
},
{
"epoch": 1.31,
"learning_rate": 5.58765748655772e-06,
"loss": 0.4301,
"step": 1319
},
{
"epoch": 1.31,
"learning_rate": 5.5731981659427785e-06,
"loss": 0.4615,
"step": 1320
},
{
"epoch": 1.31,
"learning_rate": 5.558750347134265e-06,
"loss": 0.4035,
"step": 1321
},
{
"epoch": 1.31,
"learning_rate": 5.544314067670798e-06,
"loss": 0.4512,
"step": 1322
},
{
"epoch": 1.32,
"learning_rate": 5.529889365061012e-06,
"loss": 0.4578,
"step": 1323
},
{
"epoch": 1.32,
"learning_rate": 5.5154762767834605e-06,
"loss": 0.4028,
"step": 1324
},
{
"epoch": 1.32,
"learning_rate": 5.501074840286523e-06,
"loss": 0.3962,
"step": 1325
},
{
"epoch": 1.32,
"learning_rate": 5.4866850929883045e-06,
"loss": 0.4219,
"step": 1326
},
{
"epoch": 1.32,
"learning_rate": 5.472307072276539e-06,
"loss": 0.4342,
"step": 1327
},
{
"epoch": 1.32,
"learning_rate": 5.457940815508491e-06,
"loss": 0.4261,
"step": 1328
},
{
"epoch": 1.32,
"learning_rate": 5.443586360010859e-06,
"loss": 0.4099,
"step": 1329
},
{
"epoch": 1.32,
"learning_rate": 5.429243743079686e-06,
"loss": 0.4589,
"step": 1330
},
{
"epoch": 1.32,
"learning_rate": 5.414913001980246e-06,
"loss": 0.4234,
"step": 1331
},
{
"epoch": 1.32,
"learning_rate": 5.400594173946963e-06,
"loss": 0.439,
"step": 1332
},
{
"epoch": 1.33,
"learning_rate": 5.3862872961833065e-06,
"loss": 0.3922,
"step": 1333
},
{
"epoch": 1.33,
"learning_rate": 5.3719924058616975e-06,
"loss": 0.3905,
"step": 1334
},
{
"epoch": 1.33,
"learning_rate": 5.35770954012341e-06,
"loss": 0.4081,
"step": 1335
},
{
"epoch": 1.33,
"learning_rate": 5.343438736078475e-06,
"loss": 0.4215,
"step": 1336
},
{
"epoch": 1.33,
"learning_rate": 5.329180030805584e-06,
"loss": 0.4213,
"step": 1337
},
{
"epoch": 1.33,
"learning_rate": 5.314933461352e-06,
"loss": 0.3818,
"step": 1338
},
{
"epoch": 1.33,
"learning_rate": 5.30069906473345e-06,
"loss": 0.3387,
"step": 1339
},
{
"epoch": 1.33,
"learning_rate": 5.286476877934023e-06,
"loss": 0.3873,
"step": 1340
},
{
"epoch": 1.33,
"learning_rate": 5.2722669379061e-06,
"loss": 0.4715,
"step": 1341
},
{
"epoch": 1.33,
"learning_rate": 5.258069281570231e-06,
"loss": 0.4677,
"step": 1342
},
{
"epoch": 1.34,
"learning_rate": 5.243883945815063e-06,
"loss": 0.4222,
"step": 1343
},
{
"epoch": 1.34,
"learning_rate": 5.2297109674972166e-06,
"loss": 0.4828,
"step": 1344
},
{
"epoch": 1.34,
"learning_rate": 5.215550383441221e-06,
"loss": 0.443,
"step": 1345
},
{
"epoch": 1.34,
"learning_rate": 5.201402230439381e-06,
"loss": 0.4672,
"step": 1346
},
{
"epoch": 1.34,
"learning_rate": 5.187266545251719e-06,
"loss": 0.447,
"step": 1347
},
{
"epoch": 1.34,
"learning_rate": 5.173143364605864e-06,
"loss": 0.6682,
"step": 1348
},
{
"epoch": 1.34,
"learning_rate": 5.159032725196946e-06,
"loss": 0.4944,
"step": 1349
},
{
"epoch": 1.34,
"learning_rate": 5.144934663687523e-06,
"loss": 0.3803,
"step": 1350
},
{
"epoch": 1.34,
"learning_rate": 5.1308492167074545e-06,
"loss": 0.4179,
"step": 1351
},
{
"epoch": 1.34,
"learning_rate": 5.116776420853834e-06,
"loss": 0.437,
"step": 1352
},
{
"epoch": 1.35,
"learning_rate": 5.102716312690895e-06,
"loss": 0.4987,
"step": 1353
},
{
"epoch": 1.35,
"learning_rate": 5.088668928749891e-06,
"loss": 0.3789,
"step": 1354
},
{
"epoch": 1.35,
"learning_rate": 5.074634305529021e-06,
"loss": 0.4428,
"step": 1355
},
{
"epoch": 1.35,
"learning_rate": 5.060612479493328e-06,
"loss": 0.4766,
"step": 1356
},
{
"epoch": 1.35,
"learning_rate": 5.046603487074605e-06,
"loss": 0.4213,
"step": 1357
},
{
"epoch": 1.35,
"learning_rate": 5.032607364671301e-06,
"loss": 0.3667,
"step": 1358
},
{
"epoch": 1.35,
"learning_rate": 5.0186241486484245e-06,
"loss": 0.4147,
"step": 1359
},
{
"epoch": 1.35,
"learning_rate": 5.004653875337452e-06,
"loss": 0.3781,
"step": 1360
},
{
"epoch": 1.35,
"learning_rate": 4.990696581036231e-06,
"loss": 0.4185,
"step": 1361
},
{
"epoch": 1.35,
"learning_rate": 4.976752302008888e-06,
"loss": 0.4101,
"step": 1362
},
{
"epoch": 1.36,
"learning_rate": 4.962821074485731e-06,
"loss": 0.3671,
"step": 1363
},
{
"epoch": 1.36,
"learning_rate": 4.948902934663158e-06,
"loss": 0.4203,
"step": 1364
},
{
"epoch": 1.36,
"learning_rate": 4.934997918703564e-06,
"loss": 0.4502,
"step": 1365
},
{
"epoch": 1.36,
"learning_rate": 4.921106062735241e-06,
"loss": 0.3939,
"step": 1366
},
{
"epoch": 1.36,
"learning_rate": 4.907227402852296e-06,
"loss": 0.3998,
"step": 1367
},
{
"epoch": 1.36,
"learning_rate": 4.89336197511454e-06,
"loss": 0.4318,
"step": 1368
},
{
"epoch": 1.36,
"learning_rate": 4.879509815547413e-06,
"loss": 0.3551,
"step": 1369
},
{
"epoch": 1.36,
"learning_rate": 4.865670960141874e-06,
"loss": 0.4126,
"step": 1370
},
{
"epoch": 1.36,
"learning_rate": 4.851845444854321e-06,
"loss": 0.4198,
"step": 1371
},
{
"epoch": 1.36,
"learning_rate": 4.8380333056064825e-06,
"loss": 0.4359,
"step": 1372
},
{
"epoch": 1.37,
"learning_rate": 4.824234578285352e-06,
"loss": 0.433,
"step": 1373
},
{
"epoch": 1.37,
"learning_rate": 4.810449298743051e-06,
"loss": 0.4012,
"step": 1374
},
{
"epoch": 1.37,
"learning_rate": 4.796677502796776e-06,
"loss": 0.4082,
"step": 1375
},
{
"epoch": 1.37,
"learning_rate": 4.782919226228685e-06,
"loss": 0.4195,
"step": 1376
},
{
"epoch": 1.37,
"learning_rate": 4.769174504785818e-06,
"loss": 0.4044,
"step": 1377
},
{
"epoch": 1.37,
"learning_rate": 4.7554433741799854e-06,
"loss": 0.4191,
"step": 1378
},
{
"epoch": 1.37,
"learning_rate": 4.741725870087693e-06,
"loss": 0.4289,
"step": 1379
},
{
"epoch": 1.37,
"learning_rate": 4.728022028150033e-06,
"loss": 0.4506,
"step": 1380
},
{
"epoch": 1.37,
"learning_rate": 4.7143318839726035e-06,
"loss": 0.4235,
"step": 1381
},
{
"epoch": 1.37,
"learning_rate": 4.700655473125425e-06,
"loss": 0.4138,
"step": 1382
},
{
"epoch": 1.38,
"learning_rate": 4.686992831142819e-06,
"loss": 0.4137,
"step": 1383
},
{
"epoch": 1.38,
"learning_rate": 4.673343993523347e-06,
"loss": 0.3904,
"step": 1384
},
{
"epoch": 1.38,
"learning_rate": 4.659708995729685e-06,
"loss": 0.4351,
"step": 1385
},
{
"epoch": 1.38,
"learning_rate": 4.646087873188563e-06,
"loss": 0.4152,
"step": 1386
},
{
"epoch": 1.38,
"learning_rate": 4.6324806612906654e-06,
"loss": 0.3547,
"step": 1387
},
{
"epoch": 1.38,
"learning_rate": 4.618887395390523e-06,
"loss": 0.383,
"step": 1388
},
{
"epoch": 1.38,
"learning_rate": 4.605308110806436e-06,
"loss": 0.4666,
"step": 1389
},
{
"epoch": 1.38,
"learning_rate": 4.591742842820379e-06,
"loss": 0.4357,
"step": 1390
},
{
"epoch": 1.38,
"learning_rate": 4.578191626677897e-06,
"loss": 0.4093,
"step": 1391
},
{
"epoch": 1.38,
"learning_rate": 4.564654497588047e-06,
"loss": 0.4491,
"step": 1392
},
{
"epoch": 1.39,
"learning_rate": 4.551131490723267e-06,
"loss": 0.4566,
"step": 1393
},
{
"epoch": 1.39,
"learning_rate": 4.537622641219309e-06,
"loss": 0.422,
"step": 1394
},
{
"epoch": 1.39,
"learning_rate": 4.5241279841751405e-06,
"loss": 0.4501,
"step": 1395
},
{
"epoch": 1.39,
"learning_rate": 4.510647554652854e-06,
"loss": 0.382,
"step": 1396
},
{
"epoch": 1.39,
"learning_rate": 4.497181387677574e-06,
"loss": 0.3973,
"step": 1397
},
{
"epoch": 1.39,
"learning_rate": 4.483729518237369e-06,
"loss": 0.4333,
"step": 1398
},
{
"epoch": 1.39,
"learning_rate": 4.47029198128316e-06,
"loss": 0.4235,
"step": 1399
},
{
"epoch": 1.39,
"learning_rate": 4.456868811728629e-06,
"loss": 0.3611,
"step": 1400
},
{
"epoch": 1.39,
"learning_rate": 4.443460044450125e-06,
"loss": 0.4176,
"step": 1401
},
{
"epoch": 1.39,
"learning_rate": 4.4300657142865835e-06,
"loss": 0.3964,
"step": 1402
},
{
"epoch": 1.4,
"learning_rate": 4.416685856039423e-06,
"loss": 0.4177,
"step": 1403
},
{
"epoch": 1.4,
"learning_rate": 4.403320504472463e-06,
"loss": 0.3893,
"step": 1404
},
{
"epoch": 1.4,
"learning_rate": 4.389969694311831e-06,
"loss": 0.4372,
"step": 1405
},
{
"epoch": 1.4,
"learning_rate": 4.3766334602458695e-06,
"loss": 0.4166,
"step": 1406
},
{
"epoch": 1.4,
"learning_rate": 4.3633118369250645e-06,
"loss": 0.4073,
"step": 1407
},
{
"epoch": 1.4,
"learning_rate": 4.350004858961917e-06,
"loss": 0.3875,
"step": 1408
},
{
"epoch": 1.4,
"learning_rate": 4.336712560930891e-06,
"loss": 0.4031,
"step": 1409
},
{
"epoch": 1.4,
"learning_rate": 4.323434977368306e-06,
"loss": 0.3649,
"step": 1410
},
{
"epoch": 1.4,
"learning_rate": 4.310172142772243e-06,
"loss": 0.4079,
"step": 1411
},
{
"epoch": 1.4,
"learning_rate": 4.296924091602478e-06,
"loss": 0.3782,
"step": 1412
},
{
"epoch": 1.41,
"learning_rate": 4.283690858280366e-06,
"loss": 0.4052,
"step": 1413
},
{
"epoch": 1.41,
"learning_rate": 4.270472477188755e-06,
"loss": 0.3942,
"step": 1414
},
{
"epoch": 1.41,
"learning_rate": 4.257268982671912e-06,
"loss": 0.4215,
"step": 1415
},
{
"epoch": 1.41,
"learning_rate": 4.244080409035431e-06,
"loss": 0.3743,
"step": 1416
},
{
"epoch": 1.41,
"learning_rate": 4.230906790546128e-06,
"loss": 0.4581,
"step": 1417
},
{
"epoch": 1.41,
"learning_rate": 4.217748161431969e-06,
"loss": 0.4189,
"step": 1418
},
{
"epoch": 1.41,
"learning_rate": 4.204604555881967e-06,
"loss": 0.4407,
"step": 1419
},
{
"epoch": 1.41,
"learning_rate": 4.191476008046103e-06,
"loss": 0.3669,
"step": 1420
},
{
"epoch": 1.41,
"learning_rate": 4.1783625520352435e-06,
"loss": 0.383,
"step": 1421
},
{
"epoch": 1.41,
"learning_rate": 4.165264221921033e-06,
"loss": 0.4078,
"step": 1422
},
{
"epoch": 1.42,
"learning_rate": 4.15218105173582e-06,
"loss": 0.4346,
"step": 1423
},
{
"epoch": 1.42,
"learning_rate": 4.139113075472565e-06,
"loss": 0.4068,
"step": 1424
},
{
"epoch": 1.42,
"learning_rate": 4.126060327084739e-06,
"loss": 0.3872,
"step": 1425
},
{
"epoch": 1.42,
"learning_rate": 4.113022840486268e-06,
"loss": 0.4293,
"step": 1426
},
{
"epoch": 1.42,
"learning_rate": 4.100000649551413e-06,
"loss": 0.4859,
"step": 1427
},
{
"epoch": 1.42,
"learning_rate": 4.086993788114694e-06,
"loss": 0.5384,
"step": 1428
},
{
"epoch": 1.42,
"learning_rate": 4.074002289970801e-06,
"loss": 0.3733,
"step": 1429
},
{
"epoch": 1.42,
"learning_rate": 4.061026188874509e-06,
"loss": 0.4293,
"step": 1430
},
{
"epoch": 1.42,
"learning_rate": 4.048065518540589e-06,
"loss": 0.3839,
"step": 1431
},
{
"epoch": 1.42,
"learning_rate": 4.035120312643718e-06,
"loss": 0.4572,
"step": 1432
},
{
"epoch": 1.43,
"learning_rate": 4.02219060481839e-06,
"loss": 0.5494,
"step": 1433
},
{
"epoch": 1.43,
"learning_rate": 4.009276428658836e-06,
"loss": 0.4731,
"step": 1434
},
{
"epoch": 1.43,
"learning_rate": 3.996377817718932e-06,
"loss": 0.4338,
"step": 1435
},
{
"epoch": 1.43,
"learning_rate": 3.983494805512109e-06,
"loss": 0.4206,
"step": 1436
},
{
"epoch": 1.43,
"learning_rate": 3.970627425511272e-06,
"loss": 0.4375,
"step": 1437
},
{
"epoch": 1.43,
"learning_rate": 3.9577757111487095e-06,
"loss": 0.4494,
"step": 1438
},
{
"epoch": 1.43,
"learning_rate": 3.944939695816005e-06,
"loss": 0.4143,
"step": 1439
},
{
"epoch": 1.43,
"learning_rate": 3.932119412863952e-06,
"loss": 0.4882,
"step": 1440
},
{
"epoch": 1.43,
"learning_rate": 3.9193148956024795e-06,
"loss": 0.4838,
"step": 1441
},
{
"epoch": 1.43,
"learning_rate": 3.906526177300536e-06,
"loss": 0.4216,
"step": 1442
},
{
"epoch": 1.44,
"learning_rate": 3.893753291186031e-06,
"loss": 0.4707,
"step": 1443
},
{
"epoch": 1.44,
"learning_rate": 3.8809962704457375e-06,
"loss": 0.378,
"step": 1444
},
{
"epoch": 1.44,
"learning_rate": 3.868255148225199e-06,
"loss": 0.4112,
"step": 1445
},
{
"epoch": 1.44,
"learning_rate": 3.855529957628671e-06,
"loss": 0.4056,
"step": 1446
},
{
"epoch": 1.44,
"learning_rate": 3.842820731718997e-06,
"loss": 0.4148,
"step": 1447
},
{
"epoch": 1.44,
"learning_rate": 3.830127503517541e-06,
"loss": 0.3965,
"step": 1448
},
{
"epoch": 1.44,
"learning_rate": 3.81745030600411e-06,
"loss": 0.4471,
"step": 1449
},
{
"epoch": 1.44,
"learning_rate": 3.8047891721168517e-06,
"loss": 0.4257,
"step": 1450
},
{
"epoch": 1.44,
"learning_rate": 3.7921441347521893e-06,
"loss": 0.351,
"step": 1451
},
{
"epoch": 1.44,
"learning_rate": 3.779515226764714e-06,
"loss": 0.4552,
"step": 1452
},
{
"epoch": 1.45,
"learning_rate": 3.766902480967106e-06,
"loss": 0.4194,
"step": 1453
},
{
"epoch": 1.45,
"learning_rate": 3.75430593013006e-06,
"loss": 0.4381,
"step": 1454
},
{
"epoch": 1.45,
"learning_rate": 3.7417256069821872e-06,
"loss": 0.4185,
"step": 1455
},
{
"epoch": 1.45,
"learning_rate": 3.729161544209945e-06,
"loss": 0.4467,
"step": 1456
},
{
"epoch": 1.45,
"learning_rate": 3.7166137744575324e-06,
"loss": 0.3922,
"step": 1457
},
{
"epoch": 1.45,
"learning_rate": 3.704082330326826e-06,
"loss": 0.4088,
"step": 1458
},
{
"epoch": 1.45,
"learning_rate": 3.6915672443772644e-06,
"loss": 0.4264,
"step": 1459
},
{
"epoch": 1.45,
"learning_rate": 3.6790685491258104e-06,
"loss": 0.4519,
"step": 1460
},
{
"epoch": 1.45,
"learning_rate": 3.666586277046825e-06,
"loss": 0.3527,
"step": 1461
},
{
"epoch": 1.45,
"learning_rate": 3.6541204605719992e-06,
"loss": 0.4036,
"step": 1462
},
{
"epoch": 1.45,
"learning_rate": 3.6416711320902722e-06,
"loss": 0.4331,
"step": 1463
},
{
"epoch": 1.46,
"learning_rate": 3.62923832394774e-06,
"loss": 0.4224,
"step": 1464
},
{
"epoch": 1.46,
"learning_rate": 3.616822068447581e-06,
"loss": 0.3773,
"step": 1465
},
{
"epoch": 1.46,
"learning_rate": 3.604422397849958e-06,
"loss": 0.3991,
"step": 1466
},
{
"epoch": 1.46,
"learning_rate": 3.592039344371949e-06,
"loss": 0.4457,
"step": 1467
},
{
"epoch": 1.46,
"learning_rate": 3.579672940187455e-06,
"loss": 0.4314,
"step": 1468
},
{
"epoch": 1.46,
"learning_rate": 3.56732321742712e-06,
"loss": 0.4106,
"step": 1469
},
{
"epoch": 1.46,
"learning_rate": 3.554990208178242e-06,
"loss": 0.4725,
"step": 1470
},
{
"epoch": 1.46,
"learning_rate": 3.5426739444846967e-06,
"loss": 0.4547,
"step": 1471
},
{
"epoch": 1.46,
"learning_rate": 3.5303744583468515e-06,
"loss": 0.4111,
"step": 1472
},
{
"epoch": 1.46,
"learning_rate": 3.5180917817214798e-06,
"loss": 0.4242,
"step": 1473
},
{
"epoch": 1.47,
"learning_rate": 3.5058259465216828e-06,
"loss": 0.4458,
"step": 1474
},
{
"epoch": 1.47,
"learning_rate": 3.493576984616801e-06,
"loss": 0.4277,
"step": 1475
},
{
"epoch": 1.47,
"learning_rate": 3.4813449278323374e-06,
"loss": 0.4247,
"step": 1476
},
{
"epoch": 1.47,
"learning_rate": 3.46912980794987e-06,
"loss": 0.4239,
"step": 1477
},
{
"epoch": 1.47,
"learning_rate": 3.456931656706972e-06,
"loss": 0.3874,
"step": 1478
},
{
"epoch": 1.47,
"learning_rate": 3.444750505797123e-06,
"loss": 0.3985,
"step": 1479
},
{
"epoch": 1.47,
"learning_rate": 3.4325863868696453e-06,
"loss": 0.3705,
"step": 1480
},
{
"epoch": 1.47,
"learning_rate": 3.420439331529597e-06,
"loss": 0.4043,
"step": 1481
},
{
"epoch": 1.47,
"learning_rate": 3.408309371337699e-06,
"loss": 0.4601,
"step": 1482
},
{
"epoch": 1.47,
"learning_rate": 3.3961965378102635e-06,
"loss": 0.4226,
"step": 1483
},
{
"epoch": 1.48,
"learning_rate": 3.384100862419096e-06,
"loss": 0.4397,
"step": 1484
},
{
"epoch": 1.48,
"learning_rate": 3.372022376591435e-06,
"loss": 0.4365,
"step": 1485
},
{
"epoch": 1.48,
"learning_rate": 3.3599611117098463e-06,
"loss": 0.4348,
"step": 1486
},
{
"epoch": 1.48,
"learning_rate": 3.3479170991121455e-06,
"loss": 0.4159,
"step": 1487
},
{
"epoch": 1.48,
"learning_rate": 3.3358903700913357e-06,
"loss": 0.4254,
"step": 1488
},
{
"epoch": 1.48,
"learning_rate": 3.3238809558955054e-06,
"loss": 0.4367,
"step": 1489
},
{
"epoch": 1.48,
"learning_rate": 3.311888887727763e-06,
"loss": 0.3994,
"step": 1490
},
{
"epoch": 1.48,
"learning_rate": 3.2999141967461435e-06,
"loss": 0.3789,
"step": 1491
},
{
"epoch": 1.48,
"learning_rate": 3.2879569140635324e-06,
"loss": 0.4485,
"step": 1492
},
{
"epoch": 1.48,
"learning_rate": 3.276017070747579e-06,
"loss": 0.46,
"step": 1493
},
{
"epoch": 1.49,
"learning_rate": 3.2640946978206266e-06,
"loss": 0.403,
"step": 1494
},
{
"epoch": 1.49,
"learning_rate": 3.252189826259634e-06,
"loss": 0.4251,
"step": 1495
},
{
"epoch": 1.49,
"learning_rate": 3.2403024869960765e-06,
"loss": 0.423,
"step": 1496
},
{
"epoch": 1.49,
"learning_rate": 3.22843271091588e-06,
"loss": 0.4406,
"step": 1497
},
{
"epoch": 1.49,
"learning_rate": 3.2165805288593377e-06,
"loss": 0.4234,
"step": 1498
},
{
"epoch": 1.49,
"learning_rate": 3.2047459716210306e-06,
"loss": 0.408,
"step": 1499
},
{
"epoch": 1.49,
"learning_rate": 3.192929069949744e-06,
"loss": 0.424,
"step": 1500
},
{
"epoch": 1.49,
"learning_rate": 3.1811298545483937e-06,
"loss": 0.3563,
"step": 1501
},
{
"epoch": 1.49,
"learning_rate": 3.16934835607394e-06,
"loss": 0.4295,
"step": 1502
},
{
"epoch": 1.49,
"learning_rate": 3.1575846051373117e-06,
"loss": 0.4023,
"step": 1503
},
{
"epoch": 1.5,
"learning_rate": 3.145838632303325e-06,
"loss": 0.4371,
"step": 1504
},
{
"epoch": 1.5,
"learning_rate": 3.1341104680906055e-06,
"loss": 0.4269,
"step": 1505
},
{
"epoch": 1.5,
"learning_rate": 3.122400142971507e-06,
"loss": 0.3805,
"step": 1506
},
{
"epoch": 1.5,
"learning_rate": 3.1107076873720344e-06,
"loss": 0.3622,
"step": 1507
},
{
"epoch": 1.5,
"learning_rate": 3.0990331316717635e-06,
"loss": 0.3704,
"step": 1508
},
{
"epoch": 1.5,
"learning_rate": 3.087376506203763e-06,
"loss": 0.3553,
"step": 1509
},
{
"epoch": 1.5,
"learning_rate": 3.0757378412545114e-06,
"loss": 0.3825,
"step": 1510
},
{
"epoch": 1.5,
"learning_rate": 3.064117167063827e-06,
"loss": 0.4002,
"step": 1511
},
{
"epoch": 1.5,
"learning_rate": 3.0525145138247793e-06,
"loss": 0.4351,
"step": 1512
},
{
"epoch": 1.5,
"learning_rate": 3.040929911683619e-06,
"loss": 0.4098,
"step": 1513
},
{
"epoch": 1.51,
"learning_rate": 3.0293633907396903e-06,
"loss": 0.4543,
"step": 1514
},
{
"epoch": 1.51,
"learning_rate": 3.017814981045374e-06,
"loss": 0.4124,
"step": 1515
},
{
"epoch": 1.51,
"learning_rate": 3.006284712605971e-06,
"loss": 0.4123,
"step": 1516
},
{
"epoch": 1.51,
"learning_rate": 2.994772615379665e-06,
"loss": 0.4105,
"step": 1517
},
{
"epoch": 1.51,
"learning_rate": 2.983278719277418e-06,
"loss": 0.3972,
"step": 1518
},
{
"epoch": 1.51,
"learning_rate": 2.971803054162903e-06,
"loss": 0.4142,
"step": 1519
},
{
"epoch": 1.51,
"learning_rate": 2.9603456498524342e-06,
"loss": 0.4307,
"step": 1520
},
{
"epoch": 1.51,
"learning_rate": 2.948906536114864e-06,
"loss": 0.3872,
"step": 1521
},
{
"epoch": 1.51,
"learning_rate": 2.937485742671532e-06,
"loss": 0.3959,
"step": 1522
},
{
"epoch": 1.51,
"learning_rate": 2.926083299196174e-06,
"loss": 0.465,
"step": 1523
},
{
"epoch": 1.52,
"learning_rate": 2.914699235314855e-06,
"loss": 0.3882,
"step": 1524
},
{
"epoch": 1.52,
"learning_rate": 2.903333580605878e-06,
"loss": 0.4345,
"step": 1525
},
{
"epoch": 1.52,
"learning_rate": 2.8919863645997227e-06,
"loss": 0.3806,
"step": 1526
},
{
"epoch": 1.52,
"learning_rate": 2.880657616778948e-06,
"loss": 0.4074,
"step": 1527
},
{
"epoch": 1.52,
"learning_rate": 2.8693473665781367e-06,
"loss": 0.3848,
"step": 1528
},
{
"epoch": 1.52,
"learning_rate": 2.858055643383818e-06,
"loss": 0.4056,
"step": 1529
},
{
"epoch": 1.52,
"learning_rate": 2.846782476534373e-06,
"loss": 0.411,
"step": 1530
},
{
"epoch": 1.52,
"learning_rate": 2.835527895319973e-06,
"loss": 0.4615,
"step": 1531
},
{
"epoch": 1.52,
"learning_rate": 2.824291928982501e-06,
"loss": 0.4257,
"step": 1532
},
{
"epoch": 1.52,
"learning_rate": 2.813074606715465e-06,
"loss": 0.3549,
"step": 1533
},
{
"epoch": 1.53,
"learning_rate": 2.8018759576639478e-06,
"loss": 0.429,
"step": 1534
},
{
"epoch": 1.53,
"learning_rate": 2.790696010924505e-06,
"loss": 0.394,
"step": 1535
},
{
"epoch": 1.53,
"learning_rate": 2.7795347955451004e-06,
"loss": 0.3725,
"step": 1536
},
{
"epoch": 1.53,
"learning_rate": 2.7683923405250315e-06,
"loss": 0.4298,
"step": 1537
},
{
"epoch": 1.53,
"learning_rate": 2.757268674814849e-06,
"loss": 0.4425,
"step": 1538
},
{
"epoch": 1.53,
"learning_rate": 2.7461638273162895e-06,
"loss": 0.4309,
"step": 1539
},
{
"epoch": 1.53,
"learning_rate": 2.735077826882192e-06,
"loss": 0.3683,
"step": 1540
},
{
"epoch": 1.53,
"learning_rate": 2.724010702316429e-06,
"loss": 0.4057,
"step": 1541
},
{
"epoch": 1.53,
"learning_rate": 2.7129624823738267e-06,
"loss": 0.4663,
"step": 1542
},
{
"epoch": 1.53,
"learning_rate": 2.7019331957600958e-06,
"loss": 0.4449,
"step": 1543
},
{
"epoch": 1.54,
"learning_rate": 2.6909228711317526e-06,
"loss": 0.4235,
"step": 1544
},
{
"epoch": 1.54,
"learning_rate": 2.6799315370960454e-06,
"loss": 0.3336,
"step": 1545
},
{
"epoch": 1.54,
"learning_rate": 2.6689592222108827e-06,
"loss": 0.3715,
"step": 1546
},
{
"epoch": 1.54,
"learning_rate": 2.6580059549847546e-06,
"loss": 0.4626,
"step": 1547
},
{
"epoch": 1.54,
"learning_rate": 2.6470717638766607e-06,
"loss": 0.3586,
"step": 1548
},
{
"epoch": 1.54,
"learning_rate": 2.6361566772960466e-06,
"loss": 0.3947,
"step": 1549
},
{
"epoch": 1.54,
"learning_rate": 2.625260723602703e-06,
"loss": 0.4712,
"step": 1550
},
{
"epoch": 1.54,
"learning_rate": 2.614383931106722e-06,
"loss": 0.4215,
"step": 1551
},
{
"epoch": 1.54,
"learning_rate": 2.6035263280684055e-06,
"loss": 0.4123,
"step": 1552
},
{
"epoch": 1.54,
"learning_rate": 2.5926879426981943e-06,
"loss": 0.4329,
"step": 1553
},
{
"epoch": 1.55,
"learning_rate": 2.5818688031566132e-06,
"loss": 0.4774,
"step": 1554
},
{
"epoch": 1.55,
"learning_rate": 2.5710689375541596e-06,
"loss": 0.4757,
"step": 1555
},
{
"epoch": 1.55,
"learning_rate": 2.5602883739512675e-06,
"loss": 0.399,
"step": 1556
},
{
"epoch": 1.55,
"learning_rate": 2.5495271403582146e-06,
"loss": 0.4433,
"step": 1557
},
{
"epoch": 1.55,
"learning_rate": 2.5387852647350553e-06,
"loss": 0.4027,
"step": 1558
},
{
"epoch": 1.55,
"learning_rate": 2.5280627749915544e-06,
"loss": 0.4577,
"step": 1559
},
{
"epoch": 1.55,
"learning_rate": 2.517359698987102e-06,
"loss": 0.4261,
"step": 1560
},
{
"epoch": 1.55,
"learning_rate": 2.506676064530641e-06,
"loss": 0.3354,
"step": 1561
},
{
"epoch": 1.55,
"learning_rate": 2.496011899380609e-06,
"loss": 0.4003,
"step": 1562
},
{
"epoch": 1.55,
"learning_rate": 2.485367231244863e-06,
"loss": 0.4171,
"step": 1563
},
{
"epoch": 1.56,
"learning_rate": 2.4747420877805905e-06,
"loss": 0.4299,
"step": 1564
},
{
"epoch": 1.56,
"learning_rate": 2.4641364965942572e-06,
"loss": 0.4129,
"step": 1565
},
{
"epoch": 1.56,
"learning_rate": 2.453550485241526e-06,
"loss": 0.4238,
"step": 1566
},
{
"epoch": 1.56,
"learning_rate": 2.442984081227181e-06,
"loss": 0.3878,
"step": 1567
},
{
"epoch": 1.56,
"learning_rate": 2.4324373120050738e-06,
"loss": 0.4472,
"step": 1568
},
{
"epoch": 1.56,
"learning_rate": 2.421910204978033e-06,
"loss": 0.398,
"step": 1569
},
{
"epoch": 1.56,
"learning_rate": 2.411402787497801e-06,
"loss": 0.4246,
"step": 1570
},
{
"epoch": 1.56,
"learning_rate": 2.400915086864967e-06,
"loss": 0.4614,
"step": 1571
},
{
"epoch": 1.56,
"learning_rate": 2.390447130328878e-06,
"loss": 0.4215,
"step": 1572
},
{
"epoch": 1.56,
"learning_rate": 2.3799989450876005e-06,
"loss": 0.3916,
"step": 1573
},
{
"epoch": 1.57,
"learning_rate": 2.369570558287819e-06,
"loss": 0.4531,
"step": 1574
},
{
"epoch": 1.57,
"learning_rate": 2.3591619970247803e-06,
"loss": 0.4677,
"step": 1575
},
{
"epoch": 1.57,
"learning_rate": 2.3487732883422186e-06,
"loss": 0.4801,
"step": 1576
},
{
"epoch": 1.57,
"learning_rate": 2.3384044592322875e-06,
"loss": 0.426,
"step": 1577
},
{
"epoch": 1.57,
"learning_rate": 2.3280555366354906e-06,
"loss": 0.4049,
"step": 1578
},
{
"epoch": 1.57,
"learning_rate": 2.3177265474406084e-06,
"loss": 0.4584,
"step": 1579
},
{
"epoch": 1.57,
"learning_rate": 2.3074175184846303e-06,
"loss": 0.4135,
"step": 1580
},
{
"epoch": 1.57,
"learning_rate": 2.2971284765526847e-06,
"loss": 0.4577,
"step": 1581
},
{
"epoch": 1.57,
"learning_rate": 2.286859448377966e-06,
"loss": 0.4299,
"step": 1582
},
{
"epoch": 1.57,
"learning_rate": 2.276610460641682e-06,
"loss": 0.42,
"step": 1583
},
{
"epoch": 1.58,
"learning_rate": 2.2663815399729495e-06,
"loss": 0.3987,
"step": 1584
},
{
"epoch": 1.58,
"learning_rate": 2.2561727129487622e-06,
"loss": 0.4673,
"step": 1585
},
{
"epoch": 1.58,
"learning_rate": 2.245984006093902e-06,
"loss": 0.4219,
"step": 1586
},
{
"epoch": 1.58,
"learning_rate": 2.2358154458808713e-06,
"loss": 0.4034,
"step": 1587
},
{
"epoch": 1.58,
"learning_rate": 2.2256670587298378e-06,
"loss": 0.43,
"step": 1588
},
{
"epoch": 1.58,
"learning_rate": 2.215538871008538e-06,
"loss": 0.4309,
"step": 1589
},
{
"epoch": 1.58,
"learning_rate": 2.205430909032239e-06,
"loss": 0.3674,
"step": 1590
},
{
"epoch": 1.58,
"learning_rate": 2.195343199063653e-06,
"loss": 0.3717,
"step": 1591
},
{
"epoch": 1.58,
"learning_rate": 2.185275767312869e-06,
"loss": 0.4351,
"step": 1592
},
{
"epoch": 1.58,
"learning_rate": 2.175228639937299e-06,
"loss": 0.3968,
"step": 1593
},
{
"epoch": 1.59,
"learning_rate": 2.1652018430415923e-06,
"loss": 0.4552,
"step": 1594
},
{
"epoch": 1.59,
"learning_rate": 2.1551954026775723e-06,
"loss": 0.4301,
"step": 1595
},
{
"epoch": 1.59,
"learning_rate": 2.145209344844177e-06,
"loss": 0.4121,
"step": 1596
},
{
"epoch": 1.59,
"learning_rate": 2.1352436954873825e-06,
"loss": 0.4006,
"step": 1597
},
{
"epoch": 1.59,
"learning_rate": 2.1252984805001465e-06,
"loss": 0.4343,
"step": 1598
},
{
"epoch": 1.59,
"learning_rate": 2.115373725722326e-06,
"loss": 0.4547,
"step": 1599
},
{
"epoch": 1.59,
"learning_rate": 2.1054694569406243e-06,
"loss": 0.4235,
"step": 1600
},
{
"epoch": 1.59,
"learning_rate": 2.095585699888504e-06,
"loss": 0.4522,
"step": 1601
},
{
"epoch": 1.59,
"learning_rate": 2.0857224802461516e-06,
"loss": 0.3813,
"step": 1602
},
{
"epoch": 1.59,
"learning_rate": 2.075879823640382e-06,
"loss": 0.3938,
"step": 1603
},
{
"epoch": 1.6,
"learning_rate": 2.066057755644587e-06,
"loss": 0.4117,
"step": 1604
},
{
"epoch": 1.6,
"learning_rate": 2.056256301778664e-06,
"loss": 0.447,
"step": 1605
},
{
"epoch": 1.6,
"learning_rate": 2.046475487508943e-06,
"loss": 0.4229,
"step": 1606
},
{
"epoch": 1.6,
"learning_rate": 2.0367153382481407e-06,
"loss": 0.3651,
"step": 1607
},
{
"epoch": 1.6,
"learning_rate": 2.026975879355273e-06,
"loss": 0.4046,
"step": 1608
},
{
"epoch": 1.6,
"learning_rate": 2.0172571361356007e-06,
"loss": 0.368,
"step": 1609
},
{
"epoch": 1.6,
"learning_rate": 2.0075591338405586e-06,
"loss": 0.4077,
"step": 1610
},
{
"epoch": 1.6,
"learning_rate": 1.997881897667695e-06,
"loss": 0.4161,
"step": 1611
},
{
"epoch": 1.6,
"learning_rate": 1.9882254527605995e-06,
"loss": 0.4501,
"step": 1612
},
{
"epoch": 1.6,
"learning_rate": 1.978589824208843e-06,
"loss": 0.4884,
"step": 1613
},
{
"epoch": 1.61,
"learning_rate": 1.9689750370479134e-06,
"loss": 0.4173,
"step": 1614
},
{
"epoch": 1.61,
"learning_rate": 1.9593811162591446e-06,
"loss": 0.3664,
"step": 1615
},
{
"epoch": 1.61,
"learning_rate": 1.9498080867696568e-06,
"loss": 0.416,
"step": 1616
},
{
"epoch": 1.61,
"learning_rate": 1.9402559734522895e-06,
"loss": 0.4258,
"step": 1617
},
{
"epoch": 1.61,
"learning_rate": 1.930724801125539e-06,
"loss": 0.3957,
"step": 1618
},
{
"epoch": 1.61,
"learning_rate": 1.921214594553488e-06,
"loss": 0.4217,
"step": 1619
},
{
"epoch": 1.61,
"learning_rate": 1.91172537844575e-06,
"loss": 0.3908,
"step": 1620
},
{
"epoch": 1.61,
"learning_rate": 1.9022571774573995e-06,
"loss": 0.4552,
"step": 1621
},
{
"epoch": 1.61,
"learning_rate": 1.8928100161889062e-06,
"loss": 0.393,
"step": 1622
},
{
"epoch": 1.61,
"learning_rate": 1.8833839191860803e-06,
"loss": 0.4295,
"step": 1623
},
{
"epoch": 1.62,
"learning_rate": 1.8739789109399954e-06,
"loss": 0.3818,
"step": 1624
},
{
"epoch": 1.62,
"learning_rate": 1.8645950158869353e-06,
"loss": 0.4454,
"step": 1625
},
{
"epoch": 1.62,
"learning_rate": 1.8552322584083249e-06,
"loss": 0.4449,
"step": 1626
},
{
"epoch": 1.62,
"learning_rate": 1.845890662830675e-06,
"loss": 0.4192,
"step": 1627
},
{
"epoch": 1.62,
"learning_rate": 1.8365702534255103e-06,
"loss": 0.3922,
"step": 1628
},
{
"epoch": 1.62,
"learning_rate": 1.8272710544093019e-06,
"loss": 0.3795,
"step": 1629
},
{
"epoch": 1.62,
"learning_rate": 1.8179930899434207e-06,
"loss": 0.4471,
"step": 1630
},
{
"epoch": 1.62,
"learning_rate": 1.8087363841340588e-06,
"loss": 0.3972,
"step": 1631
},
{
"epoch": 1.62,
"learning_rate": 1.7995009610321833e-06,
"loss": 0.4345,
"step": 1632
},
{
"epoch": 1.62,
"learning_rate": 1.7902868446334555e-06,
"loss": 0.4326,
"step": 1633
},
{
"epoch": 1.63,
"learning_rate": 1.7810940588781811e-06,
"loss": 0.4103,
"step": 1634
},
{
"epoch": 1.63,
"learning_rate": 1.771922627651238e-06,
"loss": 0.4264,
"step": 1635
},
{
"epoch": 1.63,
"learning_rate": 1.762772574782027e-06,
"loss": 0.4327,
"step": 1636
},
{
"epoch": 1.63,
"learning_rate": 1.7536439240444037e-06,
"loss": 0.4268,
"step": 1637
},
{
"epoch": 1.63,
"learning_rate": 1.7445366991566126e-06,
"loss": 0.3864,
"step": 1638
},
{
"epoch": 1.63,
"learning_rate": 1.7354509237812334e-06,
"loss": 0.4465,
"step": 1639
},
{
"epoch": 1.63,
"learning_rate": 1.7263866215251034e-06,
"loss": 0.4102,
"step": 1640
},
{
"epoch": 1.63,
"learning_rate": 1.7173438159392863e-06,
"loss": 0.4742,
"step": 1641
},
{
"epoch": 1.63,
"learning_rate": 1.7083225305189777e-06,
"loss": 0.4043,
"step": 1642
},
{
"epoch": 1.63,
"learning_rate": 1.699322788703468e-06,
"loss": 0.3951,
"step": 1643
},
{
"epoch": 1.64,
"learning_rate": 1.690344613876066e-06,
"loss": 0.3938,
"step": 1644
},
{
"epoch": 1.64,
"learning_rate": 1.6813880293640505e-06,
"loss": 0.3878,
"step": 1645
},
{
"epoch": 1.64,
"learning_rate": 1.672453058438599e-06,
"loss": 0.393,
"step": 1646
},
{
"epoch": 1.64,
"learning_rate": 1.6635397243147366e-06,
"loss": 0.5156,
"step": 1647
},
{
"epoch": 1.64,
"learning_rate": 1.6546480501512674e-06,
"loss": 0.4314,
"step": 1648
},
{
"epoch": 1.64,
"learning_rate": 1.64577805905072e-06,
"loss": 0.3952,
"step": 1649
},
{
"epoch": 1.64,
"learning_rate": 1.6369297740592872e-06,
"loss": 0.4036,
"step": 1650
},
{
"epoch": 1.64,
"learning_rate": 1.62810321816676e-06,
"loss": 0.3635,
"step": 1651
},
{
"epoch": 1.64,
"learning_rate": 1.6192984143064771e-06,
"loss": 0.4383,
"step": 1652
},
{
"epoch": 1.64,
"learning_rate": 1.610515385355258e-06,
"loss": 0.4462,
"step": 1653
},
{
"epoch": 1.64,
"learning_rate": 1.601754154133347e-06,
"loss": 0.3936,
"step": 1654
},
{
"epoch": 1.65,
"learning_rate": 1.593014743404353e-06,
"loss": 0.4637,
"step": 1655
},
{
"epoch": 1.65,
"learning_rate": 1.5842971758751913e-06,
"loss": 0.4519,
"step": 1656
},
{
"epoch": 1.65,
"learning_rate": 1.5756014741960213e-06,
"loss": 0.4327,
"step": 1657
},
{
"epoch": 1.65,
"learning_rate": 1.5669276609601925e-06,
"loss": 0.3949,
"step": 1658
},
{
"epoch": 1.65,
"learning_rate": 1.558275758704183e-06,
"loss": 0.4259,
"step": 1659
},
{
"epoch": 1.65,
"learning_rate": 1.54964578990754e-06,
"loss": 0.4188,
"step": 1660
},
{
"epoch": 1.65,
"learning_rate": 1.541037776992822e-06,
"loss": 0.418,
"step": 1661
},
{
"epoch": 1.65,
"learning_rate": 1.5324517423255503e-06,
"loss": 0.4514,
"step": 1662
},
{
"epoch": 1.65,
"learning_rate": 1.5238877082141268e-06,
"loss": 0.4014,
"step": 1663
},
{
"epoch": 1.65,
"learning_rate": 1.5153456969098013e-06,
"loss": 0.3741,
"step": 1664
},
{
"epoch": 1.66,
"learning_rate": 1.5068257306065991e-06,
"loss": 0.4268,
"step": 1665
},
{
"epoch": 1.66,
"learning_rate": 1.498327831441274e-06,
"loss": 0.4001,
"step": 1666
},
{
"epoch": 1.66,
"learning_rate": 1.4898520214932388e-06,
"loss": 0.445,
"step": 1667
},
{
"epoch": 1.66,
"learning_rate": 1.4813983227845164e-06,
"loss": 0.4074,
"step": 1668
},
{
"epoch": 1.66,
"learning_rate": 1.4729667572796735e-06,
"loss": 0.4337,
"step": 1669
},
{
"epoch": 1.66,
"learning_rate": 1.4645573468857754e-06,
"loss": 0.412,
"step": 1670
},
{
"epoch": 1.66,
"learning_rate": 1.4561701134523288e-06,
"loss": 0.3682,
"step": 1671
},
{
"epoch": 1.66,
"learning_rate": 1.4478050787712094e-06,
"loss": 0.4196,
"step": 1672
},
{
"epoch": 1.66,
"learning_rate": 1.4394622645766232e-06,
"loss": 0.4367,
"step": 1673
},
{
"epoch": 1.66,
"learning_rate": 1.431141692545036e-06,
"loss": 0.4157,
"step": 1674
},
{
"epoch": 1.67,
"learning_rate": 1.4228433842951251e-06,
"loss": 0.4,
"step": 1675
},
{
"epoch": 1.67,
"learning_rate": 1.4145673613877298e-06,
"loss": 0.4462,
"step": 1676
},
{
"epoch": 1.67,
"learning_rate": 1.4063136453257787e-06,
"loss": 0.4257,
"step": 1677
},
{
"epoch": 1.67,
"learning_rate": 1.398082257554243e-06,
"loss": 0.4134,
"step": 1678
},
{
"epoch": 1.67,
"learning_rate": 1.389873219460085e-06,
"loss": 0.3996,
"step": 1679
},
{
"epoch": 1.67,
"learning_rate": 1.3816865523721867e-06,
"loss": 0.3794,
"step": 1680
},
{
"epoch": 1.67,
"learning_rate": 1.373522277561321e-06,
"loss": 0.4278,
"step": 1681
},
{
"epoch": 1.67,
"learning_rate": 1.3653804162400686e-06,
"loss": 0.3806,
"step": 1682
},
{
"epoch": 1.67,
"learning_rate": 1.3572609895627786e-06,
"loss": 0.393,
"step": 1683
},
{
"epoch": 1.67,
"learning_rate": 1.349164018625513e-06,
"loss": 0.4187,
"step": 1684
},
{
"epoch": 1.68,
"learning_rate": 1.3410895244659828e-06,
"loss": 0.4172,
"step": 1685
},
{
"epoch": 1.68,
"learning_rate": 1.3330375280635054e-06,
"loss": 0.4424,
"step": 1686
},
{
"epoch": 1.68,
"learning_rate": 1.3250080503389396e-06,
"loss": 0.4068,
"step": 1687
},
{
"epoch": 1.68,
"learning_rate": 1.3170011121546388e-06,
"loss": 0.4133,
"step": 1688
},
{
"epoch": 1.68,
"learning_rate": 1.3090167343143911e-06,
"loss": 0.4315,
"step": 1689
},
{
"epoch": 1.68,
"learning_rate": 1.3010549375633697e-06,
"loss": 0.4035,
"step": 1690
},
{
"epoch": 1.68,
"learning_rate": 1.2931157425880781e-06,
"loss": 0.4547,
"step": 1691
},
{
"epoch": 1.68,
"learning_rate": 1.2851991700162914e-06,
"loss": 0.3627,
"step": 1692
},
{
"epoch": 1.68,
"learning_rate": 1.2773052404170106e-06,
"loss": 0.4197,
"step": 1693
},
{
"epoch": 1.68,
"learning_rate": 1.2694339743004037e-06,
"loss": 0.3916,
"step": 1694
},
{
"epoch": 1.69,
"learning_rate": 1.2615853921177512e-06,
"loss": 0.3983,
"step": 1695
},
{
"epoch": 1.69,
"learning_rate": 1.2537595142614078e-06,
"loss": 0.36,
"step": 1696
},
{
"epoch": 1.69,
"learning_rate": 1.2459563610647186e-06,
"loss": 0.4108,
"step": 1697
},
{
"epoch": 1.69,
"learning_rate": 1.2381759528019988e-06,
"loss": 0.406,
"step": 1698
},
{
"epoch": 1.69,
"learning_rate": 1.2304183096884626e-06,
"loss": 0.4029,
"step": 1699
},
{
"epoch": 1.69,
"learning_rate": 1.2226834518801746e-06,
"loss": 0.4446,
"step": 1700
},
{
"epoch": 1.69,
"learning_rate": 1.214971399474002e-06,
"loss": 0.4296,
"step": 1701
},
{
"epoch": 1.69,
"learning_rate": 1.2072821725075567e-06,
"loss": 0.4039,
"step": 1702
},
{
"epoch": 1.69,
"learning_rate": 1.199615790959141e-06,
"loss": 0.4387,
"step": 1703
},
{
"epoch": 1.69,
"learning_rate": 1.1919722747477024e-06,
"loss": 0.4182,
"step": 1704
},
{
"epoch": 1.7,
"learning_rate": 1.184351643732784e-06,
"loss": 0.4049,
"step": 1705
},
{
"epoch": 1.7,
"learning_rate": 1.1767539177144616e-06,
"loss": 0.3915,
"step": 1706
},
{
"epoch": 1.7,
"learning_rate": 1.1691791164333054e-06,
"loss": 0.4263,
"step": 1707
},
{
"epoch": 1.7,
"learning_rate": 1.1616272595703114e-06,
"loss": 0.4193,
"step": 1708
},
{
"epoch": 1.7,
"learning_rate": 1.1540983667468686e-06,
"loss": 0.434,
"step": 1709
},
{
"epoch": 1.7,
"learning_rate": 1.1465924575247022e-06,
"loss": 0.3768,
"step": 1710
},
{
"epoch": 1.7,
"learning_rate": 1.1391095514058182e-06,
"loss": 0.4126,
"step": 1711
},
{
"epoch": 1.7,
"learning_rate": 1.131649667832453e-06,
"loss": 0.4245,
"step": 1712
},
{
"epoch": 1.7,
"learning_rate": 1.1242128261870311e-06,
"loss": 0.4378,
"step": 1713
},
{
"epoch": 1.7,
"learning_rate": 1.1167990457920985e-06,
"loss": 0.4006,
"step": 1714
},
{
"epoch": 1.71,
"learning_rate": 1.1094083459102966e-06,
"loss": 0.4273,
"step": 1715
},
{
"epoch": 1.71,
"learning_rate": 1.1020407457442905e-06,
"loss": 0.3455,
"step": 1716
},
{
"epoch": 1.71,
"learning_rate": 1.0946962644367265e-06,
"loss": 0.4709,
"step": 1717
},
{
"epoch": 1.71,
"learning_rate": 1.0873749210701868e-06,
"loss": 0.4943,
"step": 1718
},
{
"epoch": 1.71,
"learning_rate": 1.0800767346671347e-06,
"loss": 0.4557,
"step": 1719
},
{
"epoch": 1.71,
"learning_rate": 1.0728017241898648e-06,
"loss": 0.3414,
"step": 1720
},
{
"epoch": 1.71,
"learning_rate": 1.0655499085404587e-06,
"loss": 0.4201,
"step": 1721
},
{
"epoch": 1.71,
"learning_rate": 1.05832130656073e-06,
"loss": 0.4245,
"step": 1722
},
{
"epoch": 1.71,
"learning_rate": 1.0511159370321789e-06,
"loss": 0.421,
"step": 1723
},
{
"epoch": 1.71,
"learning_rate": 1.043933818675944e-06,
"loss": 0.4152,
"step": 1724
},
{
"epoch": 1.72,
"learning_rate": 1.0367749701527508e-06,
"loss": 0.4252,
"step": 1725
},
{
"epoch": 1.72,
"learning_rate": 1.0296394100628648e-06,
"loss": 0.4145,
"step": 1726
},
{
"epoch": 1.72,
"learning_rate": 1.0225271569460426e-06,
"loss": 0.4427,
"step": 1727
},
{
"epoch": 1.72,
"learning_rate": 1.0154382292814846e-06,
"loss": 0.4319,
"step": 1728
},
{
"epoch": 1.72,
"learning_rate": 1.008372645487785e-06,
"loss": 0.4014,
"step": 1729
},
{
"epoch": 1.72,
"learning_rate": 1.0013304239228938e-06,
"loss": 0.4022,
"step": 1730
},
{
"epoch": 1.72,
"learning_rate": 9.943115828840477e-07,
"loss": 0.4164,
"step": 1731
},
{
"epoch": 1.72,
"learning_rate": 9.873161406077435e-07,
"loss": 0.4665,
"step": 1732
},
{
"epoch": 1.72,
"learning_rate": 9.803441152696824e-07,
"loss": 0.4893,
"step": 1733
},
{
"epoch": 1.72,
"learning_rate": 9.733955249847183e-07,
"loss": 0.5233,
"step": 1734
},
{
"epoch": 1.73,
"learning_rate": 9.66470387806826e-07,
"loss": 0.3745,
"step": 1735
},
{
"epoch": 1.73,
"learning_rate": 9.595687217290362e-07,
"loss": 0.399,
"step": 1736
},
{
"epoch": 1.73,
"learning_rate": 9.526905446833934e-07,
"loss": 0.4313,
"step": 1737
},
{
"epoch": 1.73,
"learning_rate": 9.458358745409202e-07,
"loss": 0.3941,
"step": 1738
},
{
"epoch": 1.73,
"learning_rate": 9.390047291115567e-07,
"loss": 0.5249,
"step": 1739
},
{
"epoch": 1.73,
"learning_rate": 9.321971261441287e-07,
"loss": 0.5003,
"step": 1740
},
{
"epoch": 1.73,
"learning_rate": 9.254130833262876e-07,
"loss": 0.3672,
"step": 1741
},
{
"epoch": 1.73,
"learning_rate": 9.186526182844669e-07,
"loss": 0.4503,
"step": 1742
},
{
"epoch": 1.73,
"learning_rate": 9.119157485838459e-07,
"loss": 0.4171,
"step": 1743
},
{
"epoch": 1.73,
"learning_rate": 9.052024917282987e-07,
"loss": 0.4422,
"step": 1744
},
{
"epoch": 1.74,
"learning_rate": 8.985128651603437e-07,
"loss": 0.4272,
"step": 1745
},
{
"epoch": 1.74,
"learning_rate": 8.918468862611051e-07,
"loss": 0.3973,
"step": 1746
},
{
"epoch": 1.74,
"learning_rate": 8.852045723502667e-07,
"loss": 0.4454,
"step": 1747
},
{
"epoch": 1.74,
"learning_rate": 8.785859406860176e-07,
"loss": 0.3968,
"step": 1748
},
{
"epoch": 1.74,
"learning_rate": 8.719910084650262e-07,
"loss": 0.3769,
"step": 1749
},
{
"epoch": 1.74,
"learning_rate": 8.654197928223773e-07,
"loss": 0.4152,
"step": 1750
},
{
"epoch": 1.74,
"learning_rate": 8.588723108315377e-07,
"loss": 0.4385,
"step": 1751
},
{
"epoch": 1.74,
"learning_rate": 8.523485795043073e-07,
"loss": 0.3897,
"step": 1752
},
{
"epoch": 1.74,
"learning_rate": 8.458486157907786e-07,
"loss": 0.4034,
"step": 1753
},
{
"epoch": 1.74,
"learning_rate": 8.393724365792866e-07,
"loss": 0.4438,
"step": 1754
},
{
"epoch": 1.75,
"learning_rate": 8.329200586963748e-07,
"loss": 0.4125,
"step": 1755
},
{
"epoch": 1.75,
"learning_rate": 8.264914989067407e-07,
"loss": 0.395,
"step": 1756
},
{
"epoch": 1.75,
"learning_rate": 8.20086773913199e-07,
"loss": 0.4147,
"step": 1757
},
{
"epoch": 1.75,
"learning_rate": 8.137059003566372e-07,
"loss": 0.413,
"step": 1758
},
{
"epoch": 1.75,
"learning_rate": 8.073488948159691e-07,
"loss": 0.3923,
"step": 1759
},
{
"epoch": 1.75,
"learning_rate": 8.01015773808097e-07,
"loss": 0.427,
"step": 1760
},
{
"epoch": 1.75,
"learning_rate": 7.94706553787864e-07,
"loss": 0.4168,
"step": 1761
},
{
"epoch": 1.75,
"learning_rate": 7.884212511480139e-07,
"loss": 0.3666,
"step": 1762
},
{
"epoch": 1.75,
"learning_rate": 7.821598822191468e-07,
"loss": 0.4001,
"step": 1763
},
{
"epoch": 1.75,
"learning_rate": 7.759224632696793e-07,
"loss": 0.4285,
"step": 1764
},
{
"epoch": 1.76,
"learning_rate": 7.697090105057991e-07,
"loss": 0.4237,
"step": 1765
},
{
"epoch": 1.76,
"learning_rate": 7.635195400714279e-07,
"loss": 0.3727,
"step": 1766
},
{
"epoch": 1.76,
"learning_rate": 7.573540680481705e-07,
"loss": 0.3825,
"step": 1767
},
{
"epoch": 1.76,
"learning_rate": 7.512126104552809e-07,
"loss": 0.4473,
"step": 1768
},
{
"epoch": 1.76,
"learning_rate": 7.450951832496233e-07,
"loss": 0.3716,
"step": 1769
},
{
"epoch": 1.76,
"learning_rate": 7.390018023256196e-07,
"loss": 0.4208,
"step": 1770
},
{
"epoch": 1.76,
"learning_rate": 7.32932483515214e-07,
"loss": 0.414,
"step": 1771
},
{
"epoch": 1.76,
"learning_rate": 7.268872425878348e-07,
"loss": 0.4233,
"step": 1772
},
{
"epoch": 1.76,
"learning_rate": 7.208660952503488e-07,
"loss": 0.4298,
"step": 1773
},
{
"epoch": 1.76,
"learning_rate": 7.148690571470251e-07,
"loss": 0.3739,
"step": 1774
},
{
"epoch": 1.77,
"learning_rate": 7.088961438594922e-07,
"loss": 0.429,
"step": 1775
},
{
"epoch": 1.77,
"learning_rate": 7.029473709066892e-07,
"loss": 0.4304,
"step": 1776
},
{
"epoch": 1.77,
"learning_rate": 6.970227537448415e-07,
"loss": 0.4122,
"step": 1777
},
{
"epoch": 1.77,
"learning_rate": 6.911223077674079e-07,
"loss": 0.4515,
"step": 1778
},
{
"epoch": 1.77,
"learning_rate": 6.852460483050494e-07,
"loss": 0.4311,
"step": 1779
},
{
"epoch": 1.77,
"learning_rate": 6.793939906255831e-07,
"loss": 0.3644,
"step": 1780
},
{
"epoch": 1.77,
"learning_rate": 6.735661499339441e-07,
"loss": 0.4018,
"step": 1781
},
{
"epoch": 1.77,
"learning_rate": 6.677625413721433e-07,
"loss": 0.4104,
"step": 1782
},
{
"epoch": 1.77,
"learning_rate": 6.619831800192355e-07,
"loss": 0.4103,
"step": 1783
},
{
"epoch": 1.77,
"learning_rate": 6.562280808912768e-07,
"loss": 0.4216,
"step": 1784
},
{
"epoch": 1.78,
"learning_rate": 6.504972589412806e-07,
"loss": 0.4133,
"step": 1785
},
{
"epoch": 1.78,
"learning_rate": 6.447907290591859e-07,
"loss": 0.4507,
"step": 1786
},
{
"epoch": 1.78,
"learning_rate": 6.391085060718149e-07,
"loss": 0.3958,
"step": 1787
},
{
"epoch": 1.78,
"learning_rate": 6.334506047428346e-07,
"loss": 0.3868,
"step": 1788
},
{
"epoch": 1.78,
"learning_rate": 6.278170397727179e-07,
"loss": 0.4689,
"step": 1789
},
{
"epoch": 1.78,
"learning_rate": 6.222078257987085e-07,
"loss": 0.4259,
"step": 1790
},
{
"epoch": 1.78,
"learning_rate": 6.166229773947796e-07,
"loss": 0.404,
"step": 1791
},
{
"epoch": 1.78,
"learning_rate": 6.110625090715994e-07,
"loss": 0.3952,
"step": 1792
},
{
"epoch": 1.78,
"learning_rate": 6.055264352764878e-07,
"loss": 0.3922,
"step": 1793
},
{
"epoch": 1.78,
"learning_rate": 6.000147703933845e-07,
"loss": 0.409,
"step": 1794
},
{
"epoch": 1.79,
"learning_rate": 5.945275287428099e-07,
"loss": 0.4693,
"step": 1795
},
{
"epoch": 1.79,
"learning_rate": 5.890647245818259e-07,
"loss": 0.4072,
"step": 1796
},
{
"epoch": 1.79,
"learning_rate": 5.836263721040014e-07,
"loss": 0.4519,
"step": 1797
},
{
"epoch": 1.79,
"learning_rate": 5.782124854393745e-07,
"loss": 0.4268,
"step": 1798
},
{
"epoch": 1.79,
"learning_rate": 5.728230786544153e-07,
"loss": 0.406,
"step": 1799
},
{
"epoch": 1.79,
"learning_rate": 5.674581657519906e-07,
"loss": 0.3909,
"step": 1800
},
{
"epoch": 1.79,
"learning_rate": 5.621177606713257e-07,
"loss": 0.4146,
"step": 1801
},
{
"epoch": 1.79,
"learning_rate": 5.568018772879691e-07,
"loss": 0.429,
"step": 1802
},
{
"epoch": 1.79,
"learning_rate": 5.515105294137546e-07,
"loss": 0.3798,
"step": 1803
},
{
"epoch": 1.79,
"learning_rate": 5.46243730796776e-07,
"loss": 0.4169,
"step": 1804
},
{
"epoch": 1.8,
"learning_rate": 5.410014951213316e-07,
"loss": 0.4352,
"step": 1805
},
{
"epoch": 1.8,
"learning_rate": 5.357838360079059e-07,
"loss": 0.44,
"step": 1806
},
{
"epoch": 1.8,
"learning_rate": 5.305907670131249e-07,
"loss": 0.399,
"step": 1807
},
{
"epoch": 1.8,
"learning_rate": 5.254223016297289e-07,
"loss": 0.4008,
"step": 1808
},
{
"epoch": 1.8,
"learning_rate": 5.202784532865302e-07,
"loss": 0.4693,
"step": 1809
},
{
"epoch": 1.8,
"learning_rate": 5.15159235348377e-07,
"loss": 0.3937,
"step": 1810
},
{
"epoch": 1.8,
"learning_rate": 5.100646611161264e-07,
"loss": 0.4296,
"step": 1811
},
{
"epoch": 1.8,
"learning_rate": 5.049947438266023e-07,
"loss": 0.4094,
"step": 1812
},
{
"epoch": 1.8,
"learning_rate": 4.99949496652572e-07,
"loss": 0.4356,
"step": 1813
},
{
"epoch": 1.8,
"learning_rate": 4.949289327026952e-07,
"loss": 0.4738,
"step": 1814
},
{
"epoch": 1.81,
"learning_rate": 4.899330650215062e-07,
"loss": 0.4332,
"step": 1815
},
{
"epoch": 1.81,
"learning_rate": 4.849619065893673e-07,
"loss": 0.4374,
"step": 1816
},
{
"epoch": 1.81,
"learning_rate": 4.800154703224424e-07,
"loss": 0.4581,
"step": 1817
},
{
"epoch": 1.81,
"learning_rate": 4.7509376907266533e-07,
"loss": 0.4332,
"step": 1818
},
{
"epoch": 1.81,
"learning_rate": 4.7019681562769816e-07,
"loss": 0.407,
"step": 1819
},
{
"epoch": 1.81,
"learning_rate": 4.6532462271090763e-07,
"loss": 0.4488,
"step": 1820
},
{
"epoch": 1.81,
"learning_rate": 4.6047720298132205e-07,
"loss": 0.4136,
"step": 1821
},
{
"epoch": 1.81,
"learning_rate": 4.556545690336045e-07,
"loss": 0.3715,
"step": 1822
},
{
"epoch": 1.81,
"learning_rate": 4.5085673339802407e-07,
"loss": 0.398,
"step": 1823
},
{
"epoch": 1.81,
"learning_rate": 4.460837085404113e-07,
"loss": 0.3887,
"step": 1824
},
{
"epoch": 1.82,
"learning_rate": 4.413355068621394e-07,
"loss": 0.4453,
"step": 1825
},
{
"epoch": 1.82,
"learning_rate": 4.3661214070008006e-07,
"loss": 0.4426,
"step": 1826
},
{
"epoch": 1.82,
"learning_rate": 4.319136223265796e-07,
"loss": 0.4101,
"step": 1827
},
{
"epoch": 1.82,
"learning_rate": 4.272399639494251e-07,
"loss": 0.4348,
"step": 1828
},
{
"epoch": 1.82,
"learning_rate": 4.225911777118097e-07,
"loss": 0.4306,
"step": 1829
},
{
"epoch": 1.82,
"learning_rate": 4.179672756923037e-07,
"loss": 0.4273,
"step": 1830
},
{
"epoch": 1.82,
"learning_rate": 4.133682699048247e-07,
"loss": 0.4176,
"step": 1831
},
{
"epoch": 1.82,
"learning_rate": 4.087941722986022e-07,
"loss": 0.4283,
"step": 1832
},
{
"epoch": 1.82,
"learning_rate": 4.0424499475814836e-07,
"loss": 0.4328,
"step": 1833
},
{
"epoch": 1.82,
"learning_rate": 3.9972074910323066e-07,
"loss": 0.4124,
"step": 1834
},
{
"epoch": 1.82,
"learning_rate": 3.9522144708883493e-07,
"loss": 0.4686,
"step": 1835
},
{
"epoch": 1.83,
"learning_rate": 3.9074710040513887e-07,
"loss": 0.4194,
"step": 1836
},
{
"epoch": 1.83,
"learning_rate": 3.862977206774798e-07,
"loss": 0.434,
"step": 1837
},
{
"epoch": 1.83,
"learning_rate": 3.8187331946633154e-07,
"loss": 0.3925,
"step": 1838
},
{
"epoch": 1.83,
"learning_rate": 3.7747390826725736e-07,
"loss": 0.3995,
"step": 1839
},
{
"epoch": 1.83,
"learning_rate": 3.730994985108993e-07,
"loss": 0.4118,
"step": 1840
},
{
"epoch": 1.83,
"learning_rate": 3.687501015629369e-07,
"loss": 0.4033,
"step": 1841
},
{
"epoch": 1.83,
"learning_rate": 3.6442572872406155e-07,
"loss": 0.4642,
"step": 1842
},
{
"epoch": 1.83,
"learning_rate": 3.601263912299491e-07,
"loss": 0.4304,
"step": 1843
},
{
"epoch": 1.83,
"learning_rate": 3.5585210025122166e-07,
"loss": 0.4238,
"step": 1844
},
{
"epoch": 1.83,
"learning_rate": 3.5160286689343126e-07,
"loss": 0.3883,
"step": 1845
},
{
"epoch": 1.84,
"learning_rate": 3.4737870219702207e-07,
"loss": 0.4413,
"step": 1846
},
{
"epoch": 1.84,
"learning_rate": 3.431796171373025e-07,
"loss": 0.4022,
"step": 1847
},
{
"epoch": 1.84,
"learning_rate": 3.390056226244243e-07,
"loss": 0.4196,
"step": 1848
},
{
"epoch": 1.84,
"learning_rate": 3.3485672950334447e-07,
"loss": 0.4275,
"step": 1849
},
{
"epoch": 1.84,
"learning_rate": 3.3073294855379803e-07,
"loss": 0.4182,
"step": 1850
},
{
"epoch": 1.84,
"learning_rate": 3.266342904902764e-07,
"loss": 0.4444,
"step": 1851
},
{
"epoch": 1.84,
"learning_rate": 3.22560765962e-07,
"loss": 0.4487,
"step": 1852
},
{
"epoch": 1.84,
"learning_rate": 3.1851238555288046e-07,
"loss": 0.4218,
"step": 1853
},
{
"epoch": 1.84,
"learning_rate": 3.1448915978150365e-07,
"loss": 0.4217,
"step": 1854
},
{
"epoch": 1.84,
"learning_rate": 3.1049109910109453e-07,
"loss": 0.3923,
"step": 1855
},
{
"epoch": 1.85,
"learning_rate": 3.06518213899496e-07,
"loss": 0.4503,
"step": 1856
},
{
"epoch": 1.85,
"learning_rate": 3.025705144991398e-07,
"loss": 0.4388,
"step": 1857
},
{
"epoch": 1.85,
"learning_rate": 2.986480111570178e-07,
"loss": 0.3703,
"step": 1858
},
{
"epoch": 1.85,
"learning_rate": 2.947507140646588e-07,
"loss": 0.4014,
"step": 1859
},
{
"epoch": 1.85,
"learning_rate": 2.908786333480995e-07,
"loss": 0.4158,
"step": 1860
},
{
"epoch": 1.85,
"learning_rate": 2.8703177906785675e-07,
"loss": 0.4001,
"step": 1861
},
{
"epoch": 1.85,
"learning_rate": 2.832101612189064e-07,
"loss": 0.4517,
"step": 1862
},
{
"epoch": 1.85,
"learning_rate": 2.794137897306548e-07,
"loss": 0.4167,
"step": 1863
},
{
"epoch": 1.85,
"learning_rate": 2.756426744669105e-07,
"loss": 0.4421,
"step": 1864
},
{
"epoch": 1.85,
"learning_rate": 2.7189682522586135e-07,
"loss": 0.4287,
"step": 1865
},
{
"epoch": 1.86,
"learning_rate": 2.681762517400499e-07,
"loss": 0.4487,
"step": 1866
},
{
"epoch": 1.86,
"learning_rate": 2.644809636763446e-07,
"loss": 0.4113,
"step": 1867
},
{
"epoch": 1.86,
"learning_rate": 2.6081097063591855e-07,
"loss": 0.4303,
"step": 1868
},
{
"epoch": 1.86,
"learning_rate": 2.57166282154222e-07,
"loss": 0.4129,
"step": 1869
},
{
"epoch": 1.86,
"learning_rate": 2.5354690770096004e-07,
"loss": 0.4333,
"step": 1870
},
{
"epoch": 1.86,
"learning_rate": 2.499528566800613e-07,
"loss": 0.4321,
"step": 1871
},
{
"epoch": 1.86,
"learning_rate": 2.4638413842966725e-07,
"loss": 0.4611,
"step": 1872
},
{
"epoch": 1.86,
"learning_rate": 2.4284076222208964e-07,
"loss": 0.4593,
"step": 1873
},
{
"epoch": 1.86,
"learning_rate": 2.393227372638018e-07,
"loss": 0.4183,
"step": 1874
},
{
"epoch": 1.86,
"learning_rate": 2.3583007269540882e-07,
"loss": 0.4801,
"step": 1875
},
{
"epoch": 1.87,
"learning_rate": 2.323627775916204e-07,
"loss": 0.4098,
"step": 1876
},
{
"epoch": 1.87,
"learning_rate": 2.28920860961237e-07,
"loss": 0.4224,
"step": 1877
},
{
"epoch": 1.87,
"learning_rate": 2.2550433174711283e-07,
"loss": 0.3989,
"step": 1878
},
{
"epoch": 1.87,
"learning_rate": 2.221131988261438e-07,
"loss": 0.4619,
"step": 1879
},
{
"epoch": 1.87,
"learning_rate": 2.1874747100924188e-07,
"loss": 0.4716,
"step": 1880
},
{
"epoch": 1.87,
"learning_rate": 2.1540715704130745e-07,
"loss": 0.4662,
"step": 1881
},
{
"epoch": 1.87,
"learning_rate": 2.1209226560121477e-07,
"loss": 0.4681,
"step": 1882
},
{
"epoch": 1.87,
"learning_rate": 2.0880280530178098e-07,
"loss": 0.3667,
"step": 1883
},
{
"epoch": 1.87,
"learning_rate": 2.055387846897472e-07,
"loss": 0.4084,
"step": 1884
},
{
"epoch": 1.87,
"learning_rate": 2.0230021224575846e-07,
"loss": 0.4257,
"step": 1885
},
{
"epoch": 1.88,
"learning_rate": 1.9908709638434053e-07,
"loss": 0.4271,
"step": 1886
},
{
"epoch": 1.88,
"learning_rate": 1.9589944545387652e-07,
"loss": 0.4333,
"step": 1887
},
{
"epoch": 1.88,
"learning_rate": 1.9273726773658464e-07,
"loss": 0.4739,
"step": 1888
},
{
"epoch": 1.88,
"learning_rate": 1.8960057144850163e-07,
"loss": 0.4329,
"step": 1889
},
{
"epoch": 1.88,
"learning_rate": 1.8648936473945057e-07,
"loss": 0.4165,
"step": 1890
},
{
"epoch": 1.88,
"learning_rate": 1.8340365569303742e-07,
"loss": 0.4458,
"step": 1891
},
{
"epoch": 1.88,
"learning_rate": 1.803434523266101e-07,
"loss": 0.4275,
"step": 1892
},
{
"epoch": 1.88,
"learning_rate": 1.7730876259125396e-07,
"loss": 0.4342,
"step": 1893
},
{
"epoch": 1.88,
"learning_rate": 1.742995943717607e-07,
"loss": 0.4406,
"step": 1894
},
{
"epoch": 1.88,
"learning_rate": 1.7131595548661063e-07,
"loss": 0.3921,
"step": 1895
},
{
"epoch": 1.89,
"learning_rate": 1.6835785368795598e-07,
"loss": 0.4436,
"step": 1896
},
{
"epoch": 1.89,
"learning_rate": 1.654252966615977e-07,
"loss": 0.3591,
"step": 1897
},
{
"epoch": 1.89,
"learning_rate": 1.6251829202696524e-07,
"loss": 0.4667,
"step": 1898
},
{
"epoch": 1.89,
"learning_rate": 1.5963684733709462e-07,
"loss": 0.4801,
"step": 1899
},
{
"epoch": 1.89,
"learning_rate": 1.5678097007861715e-07,
"loss": 0.479,
"step": 1900
},
{
"epoch": 1.89,
"learning_rate": 1.539506676717284e-07,
"loss": 0.4634,
"step": 1901
},
{
"epoch": 1.89,
"learning_rate": 1.511459474701793e-07,
"loss": 0.4211,
"step": 1902
},
{
"epoch": 1.89,
"learning_rate": 1.4836681676124954e-07,
"loss": 0.4284,
"step": 1903
},
{
"epoch": 1.89,
"learning_rate": 1.4561328276573415e-07,
"loss": 0.4645,
"step": 1904
},
{
"epoch": 1.89,
"learning_rate": 1.4288535263792148e-07,
"loss": 0.3599,
"step": 1905
},
{
"epoch": 1.9,
"learning_rate": 1.4018303346557295e-07,
"loss": 0.4592,
"step": 1906
},
{
"epoch": 1.9,
"learning_rate": 1.3750633226990996e-07,
"loss": 0.4193,
"step": 1907
},
{
"epoch": 1.9,
"learning_rate": 1.3485525600559378e-07,
"loss": 0.4526,
"step": 1908
},
{
"epoch": 1.9,
"learning_rate": 1.3222981156070126e-07,
"loss": 0.4943,
"step": 1909
},
{
"epoch": 1.9,
"learning_rate": 1.2963000575671792e-07,
"loss": 0.4589,
"step": 1910
},
{
"epoch": 1.9,
"learning_rate": 1.2705584534851268e-07,
"loss": 0.4149,
"step": 1911
},
{
"epoch": 1.9,
"learning_rate": 1.2450733702431884e-07,
"loss": 0.4419,
"step": 1912
},
{
"epoch": 1.9,
"learning_rate": 1.219844874057241e-07,
"loss": 0.4058,
"step": 1913
},
{
"epoch": 1.9,
"learning_rate": 1.1948730304764622e-07,
"loss": 0.3808,
"step": 1914
},
{
"epoch": 1.9,
"learning_rate": 1.1701579043832179e-07,
"loss": 0.3885,
"step": 1915
},
{
"epoch": 1.91,
"learning_rate": 1.1456995599928522e-07,
"loss": 0.4662,
"step": 1916
},
{
"epoch": 1.91,
"learning_rate": 1.1214980608535209e-07,
"loss": 0.4368,
"step": 1917
},
{
"epoch": 1.91,
"learning_rate": 1.0975534698460577e-07,
"loss": 0.3539,
"step": 1918
},
{
"epoch": 1.91,
"learning_rate": 1.073865849183786e-07,
"loss": 0.4123,
"step": 1919
},
{
"epoch": 1.91,
"learning_rate": 1.0504352604123413e-07,
"loss": 0.4394,
"step": 1920
},
{
"epoch": 1.91,
"learning_rate": 1.0272617644095928e-07,
"loss": 0.4522,
"step": 1921
},
{
"epoch": 1.91,
"learning_rate": 1.0043454213853665e-07,
"loss": 0.4574,
"step": 1922
},
{
"epoch": 1.91,
"learning_rate": 9.816862908813784e-08,
"loss": 0.4396,
"step": 1923
},
{
"epoch": 1.91,
"learning_rate": 9.592844317710238e-08,
"loss": 0.4324,
"step": 1924
},
{
"epoch": 1.91,
"learning_rate": 9.371399022592765e-08,
"loss": 0.445,
"step": 1925
},
{
"epoch": 1.92,
"learning_rate": 9.152527598825123e-08,
"loss": 0.4121,
"step": 1926
},
{
"epoch": 1.92,
"learning_rate": 8.93623061508353e-08,
"loss": 0.4433,
"step": 1927
},
{
"epoch": 1.92,
"learning_rate": 8.722508633355109e-08,
"loss": 0.4401,
"step": 1928
},
{
"epoch": 1.92,
"learning_rate": 8.511362208936447e-08,
"loss": 0.4224,
"step": 1929
},
{
"epoch": 1.92,
"learning_rate": 8.302791890432815e-08,
"loss": 0.4306,
"step": 1930
},
{
"epoch": 1.92,
"learning_rate": 8.096798219755731e-08,
"loss": 0.4107,
"step": 1931
},
{
"epoch": 1.92,
"learning_rate": 7.893381732122063e-08,
"loss": 0.4567,
"step": 1932
},
{
"epoch": 1.92,
"learning_rate": 7.692542956052706e-08,
"loss": 0.426,
"step": 1933
},
{
"epoch": 1.92,
"learning_rate": 7.494282413371135e-08,
"loss": 0.4088,
"step": 1934
},
{
"epoch": 1.92,
"learning_rate": 7.298600619201735e-08,
"loss": 0.4629,
"step": 1935
},
{
"epoch": 1.93,
"learning_rate": 7.105498081969142e-08,
"loss": 0.4153,
"step": 1936
},
{
"epoch": 1.93,
"learning_rate": 6.914975303396021e-08,
"loss": 0.3911,
"step": 1937
},
{
"epoch": 1.93,
"learning_rate": 6.727032778502729e-08,
"loss": 0.4286,
"step": 1938
},
{
"epoch": 1.93,
"learning_rate": 6.541670995605321e-08,
"loss": 0.4342,
"step": 1939
},
{
"epoch": 1.93,
"learning_rate": 6.358890436314547e-08,
"loss": 0.4025,
"step": 1940
},
{
"epoch": 1.93,
"learning_rate": 6.178691575534412e-08,
"loss": 0.4329,
"step": 1941
},
{
"epoch": 1.93,
"learning_rate": 6.001074881461511e-08,
"loss": 0.4373,
"step": 1942
},
{
"epoch": 1.93,
"learning_rate": 5.826040815582912e-08,
"loss": 0.4208,
"step": 1943
},
{
"epoch": 1.93,
"learning_rate": 5.653589832675943e-08,
"loss": 0.3861,
"step": 1944
},
{
"epoch": 1.93,
"learning_rate": 5.483722380805967e-08,
"loss": 0.4193,
"step": 1945
},
{
"epoch": 1.94,
"learning_rate": 5.31643890132616e-08,
"loss": 0.4287,
"step": 1946
},
{
"epoch": 1.94,
"learning_rate": 5.151739828875846e-08,
"loss": 0.4071,
"step": 1947
},
{
"epoch": 1.94,
"learning_rate": 4.98962559137961e-08,
"loss": 0.4064,
"step": 1948
},
{
"epoch": 1.94,
"learning_rate": 4.830096610045854e-08,
"loss": 0.4186,
"step": 1949
},
{
"epoch": 1.94,
"learning_rate": 4.673153299365907e-08,
"loss": 0.4798,
"step": 1950
},
{
"epoch": 1.94,
"learning_rate": 4.5187960671133626e-08,
"loss": 0.3861,
"step": 1951
},
{
"epoch": 1.94,
"learning_rate": 4.367025314342077e-08,
"loss": 0.4104,
"step": 1952
},
{
"epoch": 1.94,
"learning_rate": 4.21784143538595e-08,
"loss": 0.4535,
"step": 1953
},
{
"epoch": 1.94,
"learning_rate": 4.071244817857589e-08,
"loss": 0.441,
"step": 1954
},
{
"epoch": 1.94,
"learning_rate": 3.9272358426473146e-08,
"loss": 0.436,
"step": 1955
},
{
"epoch": 1.95,
"learning_rate": 3.7858148839221565e-08,
"loss": 0.4062,
"step": 1956
},
{
"epoch": 1.95,
"learning_rate": 3.646982309124969e-08,
"loss": 0.4799,
"step": 1957
},
{
"epoch": 1.95,
"learning_rate": 3.510738478973208e-08,
"loss": 0.4585,
"step": 1958
},
{
"epoch": 1.95,
"learning_rate": 3.3770837474584874e-08,
"loss": 0.4864,
"step": 1959
},
{
"epoch": 1.95,
"learning_rate": 3.2460184618452464e-08,
"loss": 0.4516,
"step": 1960
},
{
"epoch": 1.95,
"learning_rate": 3.117542962669973e-08,
"loss": 0.4382,
"step": 1961
},
{
"epoch": 1.95,
"learning_rate": 2.991657583740315e-08,
"loss": 0.4013,
"step": 1962
},
{
"epoch": 1.95,
"learning_rate": 2.8683626521341934e-08,
"loss": 0.4508,
"step": 1963
},
{
"epoch": 1.95,
"learning_rate": 2.747658488199023e-08,
"loss": 0.461,
"step": 1964
},
{
"epoch": 1.95,
"learning_rate": 2.6295454055508263e-08,
"loss": 0.4273,
"step": 1965
},
{
"epoch": 1.96,
"learning_rate": 2.5140237110733433e-08,
"loss": 0.4016,
"step": 1966
},
{
"epoch": 1.96,
"learning_rate": 2.4010937049174788e-08,
"loss": 0.3793,
"step": 1967
},
{
"epoch": 1.96,
"learning_rate": 2.2907556805001895e-08,
"loss": 0.4418,
"step": 1968
},
{
"epoch": 1.96,
"learning_rate": 2.1830099245040427e-08,
"loss": 0.3802,
"step": 1969
},
{
"epoch": 1.96,
"learning_rate": 2.0778567168761032e-08,
"loss": 0.4189,
"step": 1970
},
{
"epoch": 1.96,
"learning_rate": 1.975296330827825e-08,
"loss": 0.4188,
"step": 1971
},
{
"epoch": 1.96,
"learning_rate": 1.875329032833495e-08,
"loss": 0.4489,
"step": 1972
},
{
"epoch": 1.96,
"learning_rate": 1.777955082630234e-08,
"loss": 0.4489,
"step": 1973
},
{
"epoch": 1.96,
"learning_rate": 1.683174733216997e-08,
"loss": 0.4434,
"step": 1974
},
{
"epoch": 1.96,
"learning_rate": 1.5909882308540182e-08,
"loss": 0.4098,
"step": 1975
},
{
"epoch": 1.97,
"learning_rate": 1.5013958150621455e-08,
"loss": 0.3922,
"step": 1976
},
{
"epoch": 1.97,
"learning_rate": 1.4143977186221735e-08,
"loss": 0.4721,
"step": 1977
},
{
"epoch": 1.97,
"learning_rate": 1.3299941675743999e-08,
"loss": 0.4666,
"step": 1978
},
{
"epoch": 1.97,
"learning_rate": 1.248185381217848e-08,
"loss": 0.4483,
"step": 1979
},
{
"epoch": 1.97,
"learning_rate": 1.1689715721097117e-08,
"loss": 0.4192,
"step": 1980
},
{
"epoch": 1.97,
"learning_rate": 1.0923529460649119e-08,
"loss": 0.4573,
"step": 1981
},
{
"epoch": 1.97,
"learning_rate": 1.0183297021555405e-08,
"loss": 0.4715,
"step": 1982
},
{
"epoch": 1.97,
"learning_rate": 9.469020327103063e-09,
"loss": 0.4233,
"step": 1983
},
{
"epoch": 1.97,
"learning_rate": 8.780701233139789e-09,
"loss": 0.4787,
"step": 1984
},
{
"epoch": 1.97,
"learning_rate": 8.118341528071671e-09,
"loss": 0.4704,
"step": 1985
},
{
"epoch": 1.98,
"learning_rate": 7.481942932853204e-09,
"loss": 0.4177,
"step": 1986
},
{
"epoch": 1.98,
"learning_rate": 6.8715071009894944e-09,
"loss": 0.4958,
"step": 1987
},
{
"epoch": 1.98,
"learning_rate": 6.2870356185273924e-09,
"loss": 0.4805,
"step": 1988
},
{
"epoch": 1.98,
"learning_rate": 5.728530004051047e-09,
"loss": 0.4165,
"step": 1989
},
{
"epoch": 1.98,
"learning_rate": 5.195991708681902e-09,
"loss": 0.4895,
"step": 1990
},
{
"epoch": 1.98,
"learning_rate": 4.68942211607315e-09,
"loss": 0.4932,
"step": 1991
},
{
"epoch": 1.98,
"learning_rate": 4.208822542401958e-09,
"loss": 0.5008,
"step": 1992
},
{
"epoch": 1.98,
"learning_rate": 3.754194236373909e-09,
"loss": 0.4242,
"step": 1993
},
{
"epoch": 1.98,
"learning_rate": 3.325538379211901e-09,
"loss": 0.4235,
"step": 1994
},
{
"epoch": 1.98,
"learning_rate": 2.9228560846583655e-09,
"loss": 0.4523,
"step": 1995
},
{
"epoch": 1.99,
"learning_rate": 2.546148398971937e-09,
"loss": 0.4823,
"step": 1996
},
{
"epoch": 1.99,
"learning_rate": 2.1954163009219043e-09,
"loss": 0.3952,
"step": 1997
},
{
"epoch": 1.99,
"learning_rate": 1.870660701785987e-09,
"loss": 0.5206,
"step": 1998
},
{
"epoch": 1.99,
"learning_rate": 1.5718824453525572e-09,
"loss": 0.4393,
"step": 1999
},
{
"epoch": 1.99,
"learning_rate": 1.299082307912869e-09,
"loss": 0.3969,
"step": 2000
}
],
"logging_steps": 1.0,
"max_steps": 2010,
"num_train_epochs": 2,
"save_steps": 100,
"total_flos": 1440382941904896.0,
"trial_name": null,
"trial_params": null
}