|
{ |
|
"best_metric": 0.026551904156804085, |
|
"best_model_checkpoint": "./phishing-email-detection/checkpoint-825", |
|
"epoch": 3.0, |
|
"eval_steps": 1, |
|
"global_step": 825, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036363636363636364, |
|
"grad_norm": 0.5588774085044861, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6866, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007272727272727273, |
|
"grad_norm": 1.193735957145691, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7133, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01090909090909091, |
|
"grad_norm": 0.7555140256881714, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6972, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014545454545454545, |
|
"grad_norm": 0.7989323735237122, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6927, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01818181818181818, |
|
"grad_norm": 0.608884334564209, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6873, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02181818181818182, |
|
"grad_norm": 0.5146162509918213, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6783, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.025454545454545455, |
|
"grad_norm": 1.2107092142105103, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.6978, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02909090909090909, |
|
"grad_norm": 0.7423388957977295, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6951, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03272727272727273, |
|
"grad_norm": 1.1845803260803223, |
|
"learning_rate": 9e-06, |
|
"loss": 0.6952, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 0.6241262555122375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6854, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.0969253778457642, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.6649, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04363636363636364, |
|
"grad_norm": 0.7080792188644409, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.6827, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04727272727272727, |
|
"grad_norm": 0.6348336935043335, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.6681, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05090909090909091, |
|
"grad_norm": 0.714078426361084, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.6719, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05454545454545454, |
|
"grad_norm": 0.6718598008155823, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6702, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05818181818181818, |
|
"grad_norm": 1.0352996587753296, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.6689, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06181818181818182, |
|
"grad_norm": 0.8572512269020081, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.6459, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06545454545454546, |
|
"grad_norm": 1.2123569250106812, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.6487, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06909090909090909, |
|
"grad_norm": 2.0234522819519043, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.6572, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 2.4744138717651367, |
|
"learning_rate": 2e-05, |
|
"loss": 0.586, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07636363636363637, |
|
"grad_norm": 1.7895574569702148, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.6114, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.3886725902557373, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.6199, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08363636363636363, |
|
"grad_norm": 2.5716464519500732, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.5951, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08727272727272728, |
|
"grad_norm": 1.6023527383804321, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.5584, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 1.8739789724349976, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5711, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09454545454545454, |
|
"grad_norm": 1.912356972694397, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.5251, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09818181818181818, |
|
"grad_norm": 2.616036891937256, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.4568, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10181818181818182, |
|
"grad_norm": 2.2210693359375, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.4547, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10545454545454545, |
|
"grad_norm": 4.0840325355529785, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.409, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 3.1913392543792725, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3759, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11272727272727273, |
|
"grad_norm": 2.67592191696167, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.3727, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11636363636363636, |
|
"grad_norm": 3.8523147106170654, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.3395, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.1377878189086914, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.2602, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12363636363636364, |
|
"grad_norm": 4.252383232116699, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.2641, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12727272727272726, |
|
"grad_norm": 2.4112355709075928, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2815, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13090909090909092, |
|
"grad_norm": 2.1956121921539307, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.212, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13454545454545455, |
|
"grad_norm": 1.6787267923355103, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2461, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.13818181818181818, |
|
"grad_norm": 5.1338300704956055, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2723, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14181818181818182, |
|
"grad_norm": 1.7655150890350342, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.2078, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 2.906001329421997, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3349, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14909090909090908, |
|
"grad_norm": 4.4803900718688965, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.1256, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15272727272727274, |
|
"grad_norm": 2.0880069732666016, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.1547, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15636363636363637, |
|
"grad_norm": 3.3797402381896973, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.137, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.1822092533111572, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1881, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16363636363636364, |
|
"grad_norm": 1.7508845329284668, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0657, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.16727272727272727, |
|
"grad_norm": 1.9360814094543457, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0578, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1709090909090909, |
|
"grad_norm": 5.421896934509277, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.2809, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.17454545454545456, |
|
"grad_norm": 3.034318685531616, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.1716, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1781818181818182, |
|
"grad_norm": 8.016839027404785, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.261, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 2.5083861351013184, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0848, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18545454545454546, |
|
"grad_norm": 7.909552097320557, |
|
"learning_rate": 4.9935483870967744e-05, |
|
"loss": 0.0823, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1890909090909091, |
|
"grad_norm": 8.652934074401855, |
|
"learning_rate": 4.9870967741935485e-05, |
|
"loss": 0.1812, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.19272727272727272, |
|
"grad_norm": 2.450180768966675, |
|
"learning_rate": 4.9806451612903226e-05, |
|
"loss": 0.065, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.19636363636363635, |
|
"grad_norm": 3.1136999130249023, |
|
"learning_rate": 4.9741935483870974e-05, |
|
"loss": 0.1683, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.53245735168457, |
|
"learning_rate": 4.967741935483871e-05, |
|
"loss": 0.1243, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.20363636363636364, |
|
"grad_norm": 3.6356089115142822, |
|
"learning_rate": 4.961290322580646e-05, |
|
"loss": 0.1899, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.20727272727272728, |
|
"grad_norm": 13.28806209564209, |
|
"learning_rate": 4.95483870967742e-05, |
|
"loss": 0.416, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.2109090909090909, |
|
"grad_norm": 6.773140907287598, |
|
"learning_rate": 4.948387096774193e-05, |
|
"loss": 0.1387, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.21454545454545454, |
|
"grad_norm": 4.9038848876953125, |
|
"learning_rate": 4.941935483870968e-05, |
|
"loss": 0.1795, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 1.2570604085922241, |
|
"learning_rate": 4.935483870967742e-05, |
|
"loss": 0.0462, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22181818181818183, |
|
"grad_norm": 4.595444202423096, |
|
"learning_rate": 4.929032258064516e-05, |
|
"loss": 0.2883, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.22545454545454546, |
|
"grad_norm": 5.1306891441345215, |
|
"learning_rate": 4.9225806451612904e-05, |
|
"loss": 0.1155, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.2290909090909091, |
|
"grad_norm": 3.7912144660949707, |
|
"learning_rate": 4.916129032258065e-05, |
|
"loss": 0.254, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.23272727272727273, |
|
"grad_norm": 5.713457107543945, |
|
"learning_rate": 4.9096774193548387e-05, |
|
"loss": 0.1327, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.23636363636363636, |
|
"grad_norm": 4.170778274536133, |
|
"learning_rate": 4.903225806451613e-05, |
|
"loss": 0.1039, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.75148344039917, |
|
"learning_rate": 4.8967741935483876e-05, |
|
"loss": 0.0852, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.24363636363636362, |
|
"grad_norm": 9.794595718383789, |
|
"learning_rate": 4.890322580645161e-05, |
|
"loss": 0.2108, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.24727272727272728, |
|
"grad_norm": 9.14588737487793, |
|
"learning_rate": 4.883870967741936e-05, |
|
"loss": 0.1652, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2509090909090909, |
|
"grad_norm": 0.7871703505516052, |
|
"learning_rate": 4.87741935483871e-05, |
|
"loss": 0.0286, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 3.963533401489258, |
|
"learning_rate": 4.870967741935484e-05, |
|
"loss": 0.1447, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2581818181818182, |
|
"grad_norm": 1.9403924942016602, |
|
"learning_rate": 4.864516129032258e-05, |
|
"loss": 0.0346, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.26181818181818184, |
|
"grad_norm": 4.882287979125977, |
|
"learning_rate": 4.858064516129032e-05, |
|
"loss": 0.0858, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.26545454545454544, |
|
"grad_norm": 3.18686842918396, |
|
"learning_rate": 4.8516129032258065e-05, |
|
"loss": 0.1201, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2690909090909091, |
|
"grad_norm": 1.2996779680252075, |
|
"learning_rate": 4.8451612903225806e-05, |
|
"loss": 0.0285, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 4.498321056365967, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 0.1272, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27636363636363637, |
|
"grad_norm": 7.456954479217529, |
|
"learning_rate": 4.8322580645161295e-05, |
|
"loss": 0.1553, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.462753415107727, |
|
"learning_rate": 4.8258064516129036e-05, |
|
"loss": 0.0688, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.28363636363636363, |
|
"grad_norm": 2.150094985961914, |
|
"learning_rate": 4.819354838709678e-05, |
|
"loss": 0.0558, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2872727272727273, |
|
"grad_norm": 4.6224284172058105, |
|
"learning_rate": 4.812903225806452e-05, |
|
"loss": 0.1185, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 3.0150296688079834, |
|
"learning_rate": 4.806451612903226e-05, |
|
"loss": 0.2397, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29454545454545455, |
|
"grad_norm": 13.630721092224121, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.2093, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.29818181818181816, |
|
"grad_norm": 8.345911026000977, |
|
"learning_rate": 4.793548387096774e-05, |
|
"loss": 0.1799, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.3018181818181818, |
|
"grad_norm": 4.615355491638184, |
|
"learning_rate": 4.7870967741935484e-05, |
|
"loss": 0.0951, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3054545454545455, |
|
"grad_norm": 6.051864147186279, |
|
"learning_rate": 4.780645161290323e-05, |
|
"loss": 0.2423, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3090909090909091, |
|
"grad_norm": 3.912353277206421, |
|
"learning_rate": 4.774193548387097e-05, |
|
"loss": 0.0923, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.31272727272727274, |
|
"grad_norm": 5.80419397354126, |
|
"learning_rate": 4.767741935483871e-05, |
|
"loss": 0.1406, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.31636363636363635, |
|
"grad_norm": 4.3275227546691895, |
|
"learning_rate": 4.7612903225806455e-05, |
|
"loss": 0.2217, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.613757610321045, |
|
"learning_rate": 4.75483870967742e-05, |
|
"loss": 0.1888, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3236363636363636, |
|
"grad_norm": 0.619616687297821, |
|
"learning_rate": 4.748387096774194e-05, |
|
"loss": 0.0193, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 3.2283294200897217, |
|
"learning_rate": 4.741935483870968e-05, |
|
"loss": 0.1036, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33090909090909093, |
|
"grad_norm": 2.8031208515167236, |
|
"learning_rate": 4.735483870967742e-05, |
|
"loss": 0.1384, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.33454545454545453, |
|
"grad_norm": 2.338831901550293, |
|
"learning_rate": 4.729032258064516e-05, |
|
"loss": 0.0763, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.3381818181818182, |
|
"grad_norm": 1.7797901630401611, |
|
"learning_rate": 4.72258064516129e-05, |
|
"loss": 0.1343, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3418181818181818, |
|
"grad_norm": 2.9699721336364746, |
|
"learning_rate": 4.716129032258065e-05, |
|
"loss": 0.124, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.34545454545454546, |
|
"grad_norm": 1.207624077796936, |
|
"learning_rate": 4.7096774193548385e-05, |
|
"loss": 0.0384, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3490909090909091, |
|
"grad_norm": 1.7600481510162354, |
|
"learning_rate": 4.7032258064516133e-05, |
|
"loss": 0.1058, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3527272727272727, |
|
"grad_norm": 1.9546951055526733, |
|
"learning_rate": 4.6967741935483875e-05, |
|
"loss": 0.0867, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3563636363636364, |
|
"grad_norm": 1.6174895763397217, |
|
"learning_rate": 4.6903225806451616e-05, |
|
"loss": 0.0249, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.494776725769043, |
|
"learning_rate": 4.683870967741936e-05, |
|
"loss": 0.0813, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 1.799558162689209, |
|
"learning_rate": 4.67741935483871e-05, |
|
"loss": 0.0698, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36727272727272725, |
|
"grad_norm": 1.8396685123443604, |
|
"learning_rate": 4.670967741935484e-05, |
|
"loss": 0.1093, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3709090909090909, |
|
"grad_norm": 1.1487964391708374, |
|
"learning_rate": 4.664516129032258e-05, |
|
"loss": 0.0199, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.37454545454545457, |
|
"grad_norm": 3.693464756011963, |
|
"learning_rate": 4.658064516129033e-05, |
|
"loss": 0.0468, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3781818181818182, |
|
"grad_norm": 0.4796558916568756, |
|
"learning_rate": 4.651612903225806e-05, |
|
"loss": 0.0142, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.38181818181818183, |
|
"grad_norm": 1.3589314222335815, |
|
"learning_rate": 4.645161290322581e-05, |
|
"loss": 0.0197, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.38545454545454544, |
|
"grad_norm": 5.373131275177002, |
|
"learning_rate": 4.638709677419355e-05, |
|
"loss": 0.1297, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3890909090909091, |
|
"grad_norm": 5.07847261428833, |
|
"learning_rate": 4.632258064516129e-05, |
|
"loss": 0.1152, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3927272727272727, |
|
"grad_norm": 2.5810861587524414, |
|
"learning_rate": 4.6258064516129035e-05, |
|
"loss": 0.0499, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.39636363636363636, |
|
"grad_norm": 3.111640691757202, |
|
"learning_rate": 4.6193548387096776e-05, |
|
"loss": 0.0488, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.8592653274536133, |
|
"learning_rate": 4.612903225806452e-05, |
|
"loss": 0.0648, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4036363636363636, |
|
"grad_norm": 4.070368766784668, |
|
"learning_rate": 4.606451612903226e-05, |
|
"loss": 0.1844, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.4072727272727273, |
|
"grad_norm": 2.12369441986084, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.0645, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4109090909090909, |
|
"grad_norm": 2.3667800426483154, |
|
"learning_rate": 4.593548387096774e-05, |
|
"loss": 0.0471, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.41454545454545455, |
|
"grad_norm": 4.2587056159973145, |
|
"learning_rate": 4.587096774193548e-05, |
|
"loss": 0.062, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.41818181818181815, |
|
"grad_norm": 4.729971885681152, |
|
"learning_rate": 4.580645161290323e-05, |
|
"loss": 0.1277, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.4218181818181818, |
|
"grad_norm": 2.8815808296203613, |
|
"learning_rate": 4.5741935483870965e-05, |
|
"loss": 0.1424, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.4254545454545455, |
|
"grad_norm": 3.5163843631744385, |
|
"learning_rate": 4.567741935483871e-05, |
|
"loss": 0.1753, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.4290909090909091, |
|
"grad_norm": 0.9345911145210266, |
|
"learning_rate": 4.5612903225806454e-05, |
|
"loss": 0.0173, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.43272727272727274, |
|
"grad_norm": 2.4038431644439697, |
|
"learning_rate": 4.5548387096774196e-05, |
|
"loss": 0.1339, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 1.086334228515625, |
|
"learning_rate": 4.548387096774194e-05, |
|
"loss": 0.06, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.3562028408050537, |
|
"learning_rate": 4.5419354838709685e-05, |
|
"loss": 0.2079, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.44363636363636366, |
|
"grad_norm": 3.6790003776550293, |
|
"learning_rate": 4.535483870967742e-05, |
|
"loss": 0.1622, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.44727272727272727, |
|
"grad_norm": 4.95260763168335, |
|
"learning_rate": 4.529032258064516e-05, |
|
"loss": 0.069, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.4509090909090909, |
|
"grad_norm": 1.6130903959274292, |
|
"learning_rate": 4.522580645161291e-05, |
|
"loss": 0.1197, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 3.9144365787506104, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 0.063, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4581818181818182, |
|
"grad_norm": 7.754761219024658, |
|
"learning_rate": 4.509677419354839e-05, |
|
"loss": 0.358, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.4618181818181818, |
|
"grad_norm": 4.277048110961914, |
|
"learning_rate": 4.503225806451613e-05, |
|
"loss": 0.1137, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.46545454545454545, |
|
"grad_norm": 4.798757553100586, |
|
"learning_rate": 4.4967741935483873e-05, |
|
"loss": 0.1365, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4690909090909091, |
|
"grad_norm": 2.407785177230835, |
|
"learning_rate": 4.4903225806451615e-05, |
|
"loss": 0.0339, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 2.8577773571014404, |
|
"learning_rate": 4.4838709677419356e-05, |
|
"loss": 0.0634, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4763636363636364, |
|
"grad_norm": 2.8833367824554443, |
|
"learning_rate": 4.47741935483871e-05, |
|
"loss": 0.1274, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.3655707836151123, |
|
"learning_rate": 4.470967741935484e-05, |
|
"loss": 0.0845, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.48363636363636364, |
|
"grad_norm": 0.4524034559726715, |
|
"learning_rate": 4.4645161290322586e-05, |
|
"loss": 0.0201, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.48727272727272725, |
|
"grad_norm": 2.0502264499664307, |
|
"learning_rate": 4.458064516129032e-05, |
|
"loss": 0.0241, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.4909090909090909, |
|
"grad_norm": 3.171466588973999, |
|
"learning_rate": 4.451612903225807e-05, |
|
"loss": 0.0988, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.49454545454545457, |
|
"grad_norm": 3.592583179473877, |
|
"learning_rate": 4.445161290322581e-05, |
|
"loss": 0.0352, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.49818181818181817, |
|
"grad_norm": 2.234022617340088, |
|
"learning_rate": 4.438709677419355e-05, |
|
"loss": 0.1045, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5018181818181818, |
|
"grad_norm": 1.0845825672149658, |
|
"learning_rate": 4.432258064516129e-05, |
|
"loss": 0.1009, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5054545454545455, |
|
"grad_norm": 2.9205362796783447, |
|
"learning_rate": 4.4258064516129034e-05, |
|
"loss": 0.0515, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 3.975783109664917, |
|
"learning_rate": 4.4193548387096775e-05, |
|
"loss": 0.1285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5127272727272727, |
|
"grad_norm": 5.859277248382568, |
|
"learning_rate": 4.4129032258064516e-05, |
|
"loss": 0.0997, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5163636363636364, |
|
"grad_norm": 4.712912559509277, |
|
"learning_rate": 4.4064516129032264e-05, |
|
"loss": 0.1876, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.739158868789673, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.0487, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5236363636363637, |
|
"grad_norm": 0.8333390951156616, |
|
"learning_rate": 4.393548387096774e-05, |
|
"loss": 0.0169, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5272727272727272, |
|
"grad_norm": 1.7556527853012085, |
|
"learning_rate": 4.387096774193549e-05, |
|
"loss": 0.0418, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5309090909090909, |
|
"grad_norm": 0.7867715358734131, |
|
"learning_rate": 4.380645161290323e-05, |
|
"loss": 0.0097, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5345454545454545, |
|
"grad_norm": 1.7784496545791626, |
|
"learning_rate": 4.374193548387097e-05, |
|
"loss": 0.0308, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5381818181818182, |
|
"grad_norm": 3.104606866836548, |
|
"learning_rate": 4.367741935483871e-05, |
|
"loss": 0.0512, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5418181818181819, |
|
"grad_norm": 1.6434437036514282, |
|
"learning_rate": 4.361290322580645e-05, |
|
"loss": 0.086, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 3.0264408588409424, |
|
"learning_rate": 4.3548387096774194e-05, |
|
"loss": 0.0293, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5490909090909091, |
|
"grad_norm": 0.4782671332359314, |
|
"learning_rate": 4.3483870967741936e-05, |
|
"loss": 0.0107, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5527272727272727, |
|
"grad_norm": 3.8326480388641357, |
|
"learning_rate": 4.3419354838709684e-05, |
|
"loss": 0.1158, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5563636363636364, |
|
"grad_norm": 2.803429126739502, |
|
"learning_rate": 4.335483870967742e-05, |
|
"loss": 0.0907, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.267507076263428, |
|
"learning_rate": 4.3290322580645166e-05, |
|
"loss": 0.0712, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5636363636363636, |
|
"grad_norm": 2.4244847297668457, |
|
"learning_rate": 4.322580645161291e-05, |
|
"loss": 0.0567, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.5672727272727273, |
|
"grad_norm": 1.6119318008422852, |
|
"learning_rate": 4.316129032258065e-05, |
|
"loss": 0.166, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.5709090909090909, |
|
"grad_norm": 2.521883964538574, |
|
"learning_rate": 4.309677419354839e-05, |
|
"loss": 0.0308, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.5745454545454546, |
|
"grad_norm": 2.1113216876983643, |
|
"learning_rate": 4.303225806451613e-05, |
|
"loss": 0.0302, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.5781818181818181, |
|
"grad_norm": 1.94487726688385, |
|
"learning_rate": 4.296774193548387e-05, |
|
"loss": 0.0199, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 3.142296552658081, |
|
"learning_rate": 4.2903225806451614e-05, |
|
"loss": 0.0744, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5854545454545454, |
|
"grad_norm": 1.6849907636642456, |
|
"learning_rate": 4.283870967741936e-05, |
|
"loss": 0.0321, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.5890909090909091, |
|
"grad_norm": 5.332469463348389, |
|
"learning_rate": 4.2774193548387096e-05, |
|
"loss": 0.1344, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5927272727272728, |
|
"grad_norm": 0.8968711495399475, |
|
"learning_rate": 4.2709677419354844e-05, |
|
"loss": 0.073, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5963636363636363, |
|
"grad_norm": 5.70121955871582, |
|
"learning_rate": 4.2645161290322585e-05, |
|
"loss": 0.1092, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.3494465351104736, |
|
"learning_rate": 4.258064516129032e-05, |
|
"loss": 0.0153, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6036363636363636, |
|
"grad_norm": 1.2071605920791626, |
|
"learning_rate": 4.251612903225807e-05, |
|
"loss": 0.0636, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6072727272727273, |
|
"grad_norm": 2.2756154537200928, |
|
"learning_rate": 4.245161290322581e-05, |
|
"loss": 0.0523, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.610909090909091, |
|
"grad_norm": 2.9887783527374268, |
|
"learning_rate": 4.238709677419355e-05, |
|
"loss": 0.0991, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6145454545454545, |
|
"grad_norm": 3.471949577331543, |
|
"learning_rate": 4.232258064516129e-05, |
|
"loss": 0.1146, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 0.9178719520568848, |
|
"learning_rate": 4.225806451612904e-05, |
|
"loss": 0.0283, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6218181818181818, |
|
"grad_norm": 1.7535454034805298, |
|
"learning_rate": 4.2193548387096774e-05, |
|
"loss": 0.1037, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6254545454545455, |
|
"grad_norm": 0.523720383644104, |
|
"learning_rate": 4.2129032258064515e-05, |
|
"loss": 0.0188, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6290909090909091, |
|
"grad_norm": 2.872236728668213, |
|
"learning_rate": 4.206451612903226e-05, |
|
"loss": 0.137, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6327272727272727, |
|
"grad_norm": 1.1402463912963867, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0469, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 1.3822078704833984, |
|
"learning_rate": 4.1935483870967746e-05, |
|
"loss": 0.0299, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.6050485372543335, |
|
"learning_rate": 4.187096774193549e-05, |
|
"loss": 0.0313, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6436363636363637, |
|
"grad_norm": 1.4865597486495972, |
|
"learning_rate": 4.180645161290323e-05, |
|
"loss": 0.0199, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.6472727272727272, |
|
"grad_norm": 1.0985281467437744, |
|
"learning_rate": 4.174193548387097e-05, |
|
"loss": 0.0161, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.6509090909090909, |
|
"grad_norm": 2.7019283771514893, |
|
"learning_rate": 4.167741935483871e-05, |
|
"loss": 0.1127, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 0.20963142812252045, |
|
"learning_rate": 4.161290322580645e-05, |
|
"loss": 0.0069, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6581818181818182, |
|
"grad_norm": 2.687619209289551, |
|
"learning_rate": 4.154838709677419e-05, |
|
"loss": 0.0189, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6618181818181819, |
|
"grad_norm": 0.5330418348312378, |
|
"learning_rate": 4.148387096774194e-05, |
|
"loss": 0.0878, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6654545454545454, |
|
"grad_norm": 6.277083873748779, |
|
"learning_rate": 4.1419354838709676e-05, |
|
"loss": 0.2121, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.6690909090909091, |
|
"grad_norm": 4.049676895141602, |
|
"learning_rate": 4.1354838709677424e-05, |
|
"loss": 0.069, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6727272727272727, |
|
"grad_norm": 2.069470167160034, |
|
"learning_rate": 4.1290322580645165e-05, |
|
"loss": 0.0485, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6763636363636364, |
|
"grad_norm": 5.907510280609131, |
|
"learning_rate": 4.1225806451612906e-05, |
|
"loss": 0.0886, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.1604321002960205, |
|
"learning_rate": 4.116129032258065e-05, |
|
"loss": 0.0116, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.6836363636363636, |
|
"grad_norm": 2.6261606216430664, |
|
"learning_rate": 4.109677419354839e-05, |
|
"loss": 0.0438, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.6872727272727273, |
|
"grad_norm": 2.597719669342041, |
|
"learning_rate": 4.103225806451613e-05, |
|
"loss": 0.1081, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 0.45269355177879333, |
|
"learning_rate": 4.096774193548387e-05, |
|
"loss": 0.0057, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6945454545454546, |
|
"grad_norm": 3.5486068725585938, |
|
"learning_rate": 4.090322580645162e-05, |
|
"loss": 0.0569, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6981818181818182, |
|
"grad_norm": 2.485374927520752, |
|
"learning_rate": 4.0838709677419354e-05, |
|
"loss": 0.1183, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7018181818181818, |
|
"grad_norm": 4.209328651428223, |
|
"learning_rate": 4.0774193548387095e-05, |
|
"loss": 0.1006, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.7054545454545454, |
|
"grad_norm": 1.0859043598175049, |
|
"learning_rate": 4.070967741935484e-05, |
|
"loss": 0.0117, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7090909090909091, |
|
"grad_norm": 1.0647627115249634, |
|
"learning_rate": 4.0645161290322584e-05, |
|
"loss": 0.0618, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7127272727272728, |
|
"grad_norm": 1.131689429283142, |
|
"learning_rate": 4.0580645161290325e-05, |
|
"loss": 0.0439, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7163636363636363, |
|
"grad_norm": 3.031101942062378, |
|
"learning_rate": 4.0516129032258067e-05, |
|
"loss": 0.1021, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.11612915247678757, |
|
"learning_rate": 4.045161290322581e-05, |
|
"loss": 0.0048, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7236363636363636, |
|
"grad_norm": 1.4697656631469727, |
|
"learning_rate": 4.038709677419355e-05, |
|
"loss": 0.0296, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 2.3507072925567627, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 0.0975, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.730909090909091, |
|
"grad_norm": 3.2952606678009033, |
|
"learning_rate": 4.025806451612903e-05, |
|
"loss": 0.0741, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7345454545454545, |
|
"grad_norm": 1.3989083766937256, |
|
"learning_rate": 4.019354838709677e-05, |
|
"loss": 0.0603, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7381818181818182, |
|
"grad_norm": 0.7519178986549377, |
|
"learning_rate": 4.012903225806452e-05, |
|
"loss": 0.0195, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7418181818181818, |
|
"grad_norm": 0.9893004298210144, |
|
"learning_rate": 4.006451612903226e-05, |
|
"loss": 0.0316, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7454545454545455, |
|
"grad_norm": 2.2764501571655273, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0347, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7490909090909091, |
|
"grad_norm": 2.6084823608398438, |
|
"learning_rate": 3.9935483870967745e-05, |
|
"loss": 0.1071, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.7527272727272727, |
|
"grad_norm": 1.7547663450241089, |
|
"learning_rate": 3.9870967741935486e-05, |
|
"loss": 0.0181, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.7563636363636363, |
|
"grad_norm": 0.3479810953140259, |
|
"learning_rate": 3.980645161290323e-05, |
|
"loss": 0.0101, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.399048924446106, |
|
"learning_rate": 3.974193548387097e-05, |
|
"loss": 0.0244, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 3.616453170776367, |
|
"learning_rate": 3.9677419354838716e-05, |
|
"loss": 0.0863, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7672727272727272, |
|
"grad_norm": 2.61297345161438, |
|
"learning_rate": 3.961290322580645e-05, |
|
"loss": 0.0447, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.7709090909090909, |
|
"grad_norm": 1.899042010307312, |
|
"learning_rate": 3.95483870967742e-05, |
|
"loss": 0.0313, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.7745454545454545, |
|
"grad_norm": 0.9543875455856323, |
|
"learning_rate": 3.948387096774194e-05, |
|
"loss": 0.0243, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.7781818181818182, |
|
"grad_norm": 1.5872313976287842, |
|
"learning_rate": 3.9419354838709674e-05, |
|
"loss": 0.0122, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.7818181818181819, |
|
"grad_norm": 1.8551832437515259, |
|
"learning_rate": 3.935483870967742e-05, |
|
"loss": 0.0535, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.7854545454545454, |
|
"grad_norm": 3.93450927734375, |
|
"learning_rate": 3.9290322580645164e-05, |
|
"loss": 0.1321, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.7890909090909091, |
|
"grad_norm": 4.761463165283203, |
|
"learning_rate": 3.9225806451612905e-05, |
|
"loss": 0.142, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.7927272727272727, |
|
"grad_norm": 1.0365723371505737, |
|
"learning_rate": 3.9161290322580646e-05, |
|
"loss": 0.0197, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.7963636363636364, |
|
"grad_norm": 2.633030414581299, |
|
"learning_rate": 3.9096774193548394e-05, |
|
"loss": 0.0375, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4770023822784424, |
|
"learning_rate": 3.903225806451613e-05, |
|
"loss": 0.0251, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8036363636363636, |
|
"grad_norm": 2.1788926124572754, |
|
"learning_rate": 3.896774193548387e-05, |
|
"loss": 0.0146, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8072727272727273, |
|
"grad_norm": 1.1818387508392334, |
|
"learning_rate": 3.890322580645162e-05, |
|
"loss": 0.0307, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.8109090909090909, |
|
"grad_norm": 1.7780365943908691, |
|
"learning_rate": 3.883870967741935e-05, |
|
"loss": 0.0436, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.8145454545454546, |
|
"grad_norm": 0.3603725731372833, |
|
"learning_rate": 3.87741935483871e-05, |
|
"loss": 0.0086, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 2.094778060913086, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 0.0535, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8218181818181818, |
|
"grad_norm": 1.7837364673614502, |
|
"learning_rate": 3.864516129032258e-05, |
|
"loss": 0.0324, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.8254545454545454, |
|
"grad_norm": 1.0126112699508667, |
|
"learning_rate": 3.8580645161290324e-05, |
|
"loss": 0.0195, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8290909090909091, |
|
"grad_norm": 2.095411777496338, |
|
"learning_rate": 3.8516129032258065e-05, |
|
"loss": 0.0858, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.8327272727272728, |
|
"grad_norm": 2.12591290473938, |
|
"learning_rate": 3.845161290322581e-05, |
|
"loss": 0.0745, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 4.18102502822876, |
|
"learning_rate": 3.838709677419355e-05, |
|
"loss": 0.0876, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.6359128952026367, |
|
"learning_rate": 3.8322580645161296e-05, |
|
"loss": 0.0261, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8436363636363636, |
|
"grad_norm": 2.9271247386932373, |
|
"learning_rate": 3.825806451612903e-05, |
|
"loss": 0.0432, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8472727272727273, |
|
"grad_norm": 4.78306245803833, |
|
"learning_rate": 3.819354838709678e-05, |
|
"loss": 0.1117, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.850909090909091, |
|
"grad_norm": 0.5705394148826599, |
|
"learning_rate": 3.812903225806452e-05, |
|
"loss": 0.0054, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8545454545454545, |
|
"grad_norm": 2.0522520542144775, |
|
"learning_rate": 3.8064516129032254e-05, |
|
"loss": 0.0245, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8581818181818182, |
|
"grad_norm": 2.7212295532226562, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.0252, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.8618181818181818, |
|
"grad_norm": 0.7977845072746277, |
|
"learning_rate": 3.793548387096774e-05, |
|
"loss": 0.0127, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.8654545454545455, |
|
"grad_norm": 3.720811605453491, |
|
"learning_rate": 3.7870967741935485e-05, |
|
"loss": 0.0465, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.8690909090909091, |
|
"grad_norm": 2.362733840942383, |
|
"learning_rate": 3.7806451612903226e-05, |
|
"loss": 0.0266, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 2.5481631755828857, |
|
"learning_rate": 3.7741935483870974e-05, |
|
"loss": 0.0224, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8763636363636363, |
|
"grad_norm": 2.1781375408172607, |
|
"learning_rate": 3.767741935483871e-05, |
|
"loss": 0.0228, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.9818776845932007, |
|
"learning_rate": 3.761290322580645e-05, |
|
"loss": 0.0231, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.8836363636363637, |
|
"grad_norm": 0.20954543352127075, |
|
"learning_rate": 3.75483870967742e-05, |
|
"loss": 0.0031, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.8872727272727273, |
|
"grad_norm": 2.769566535949707, |
|
"learning_rate": 3.748387096774193e-05, |
|
"loss": 0.039, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.8909090909090909, |
|
"grad_norm": 2.511801242828369, |
|
"learning_rate": 3.741935483870968e-05, |
|
"loss": 0.0154, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.8945454545454545, |
|
"grad_norm": 0.3851822018623352, |
|
"learning_rate": 3.735483870967742e-05, |
|
"loss": 0.0047, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.8981818181818182, |
|
"grad_norm": 1.6585170030593872, |
|
"learning_rate": 3.729032258064516e-05, |
|
"loss": 0.0882, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9018181818181819, |
|
"grad_norm": 1.9124457836151123, |
|
"learning_rate": 3.7225806451612904e-05, |
|
"loss": 0.0138, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9054545454545454, |
|
"grad_norm": 5.743299961090088, |
|
"learning_rate": 3.716129032258065e-05, |
|
"loss": 0.0494, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 3.5978081226348877, |
|
"learning_rate": 3.7096774193548386e-05, |
|
"loss": 0.0182, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9127272727272727, |
|
"grad_norm": 0.024638742208480835, |
|
"learning_rate": 3.703225806451613e-05, |
|
"loss": 0.0012, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9163636363636364, |
|
"grad_norm": 0.04621901735663414, |
|
"learning_rate": 3.6967741935483876e-05, |
|
"loss": 0.0014, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.06507572531700134, |
|
"learning_rate": 3.690322580645162e-05, |
|
"loss": 0.0018, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9236363636363636, |
|
"grad_norm": 0.683228075504303, |
|
"learning_rate": 3.683870967741936e-05, |
|
"loss": 0.0064, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.9272727272727272, |
|
"grad_norm": 9.123976707458496, |
|
"learning_rate": 3.67741935483871e-05, |
|
"loss": 0.087, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9309090909090909, |
|
"grad_norm": 0.9182856678962708, |
|
"learning_rate": 3.670967741935484e-05, |
|
"loss": 0.0891, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9345454545454546, |
|
"grad_norm": 4.344281196594238, |
|
"learning_rate": 3.664516129032258e-05, |
|
"loss": 0.0301, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.9381818181818182, |
|
"grad_norm": 0.22626306116580963, |
|
"learning_rate": 3.658064516129032e-05, |
|
"loss": 0.0027, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9418181818181818, |
|
"grad_norm": 2.9443519115448, |
|
"learning_rate": 3.6516129032258064e-05, |
|
"loss": 0.0826, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 0.35616394877433777, |
|
"learning_rate": 3.6451612903225805e-05, |
|
"loss": 0.003, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9490909090909091, |
|
"grad_norm": 5.846389293670654, |
|
"learning_rate": 3.6387096774193553e-05, |
|
"loss": 0.1148, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.9527272727272728, |
|
"grad_norm": 0.351965069770813, |
|
"learning_rate": 3.6322580645161295e-05, |
|
"loss": 0.0054, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.9563636363636364, |
|
"grad_norm": 1.9264423847198486, |
|
"learning_rate": 3.6258064516129036e-05, |
|
"loss": 0.0241, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 7.070173263549805, |
|
"learning_rate": 3.619354838709678e-05, |
|
"loss": 0.1209, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.9636363636363636, |
|
"grad_norm": 4.125431537628174, |
|
"learning_rate": 3.612903225806452e-05, |
|
"loss": 0.0451, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.9672727272727273, |
|
"grad_norm": 0.08269821852445602, |
|
"learning_rate": 3.606451612903226e-05, |
|
"loss": 0.0024, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.9709090909090909, |
|
"grad_norm": 2.2969746589660645, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.03, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.9745454545454545, |
|
"grad_norm": 3.2518858909606934, |
|
"learning_rate": 3.593548387096774e-05, |
|
"loss": 0.0242, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.9781818181818182, |
|
"grad_norm": 0.2708548903465271, |
|
"learning_rate": 3.5870967741935483e-05, |
|
"loss": 0.0041, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 3.211353063583374, |
|
"learning_rate": 3.580645161290323e-05, |
|
"loss": 0.0195, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9854545454545455, |
|
"grad_norm": 2.5400710105895996, |
|
"learning_rate": 3.574193548387097e-05, |
|
"loss": 0.023, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.9890909090909091, |
|
"grad_norm": 4.467423915863037, |
|
"learning_rate": 3.567741935483871e-05, |
|
"loss": 0.1522, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.9927272727272727, |
|
"grad_norm": 0.29972556233406067, |
|
"learning_rate": 3.5612903225806455e-05, |
|
"loss": 0.0033, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.9963636363636363, |
|
"grad_norm": 2.243041753768921, |
|
"learning_rate": 3.5548387096774196e-05, |
|
"loss": 0.0159, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.03446698188781738, |
|
"learning_rate": 3.548387096774194e-05, |
|
"loss": 0.0015, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9890710382513661, |
|
"eval_loss": 0.032855160534381866, |
|
"eval_runtime": 12.8051, |
|
"eval_samples_per_second": 342.989, |
|
"eval_steps_per_second": 5.388, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0036363636363637, |
|
"grad_norm": 0.1759863644838333, |
|
"learning_rate": 3.541935483870968e-05, |
|
"loss": 0.0025, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.0072727272727273, |
|
"grad_norm": 0.30346181988716125, |
|
"learning_rate": 3.535483870967743e-05, |
|
"loss": 0.0035, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.010909090909091, |
|
"grad_norm": 0.5249446630477905, |
|
"learning_rate": 3.529032258064516e-05, |
|
"loss": 0.0071, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.0145454545454546, |
|
"grad_norm": 2.6401705741882324, |
|
"learning_rate": 3.52258064516129e-05, |
|
"loss": 0.0947, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.018181818181818, |
|
"grad_norm": 1.9211256504058838, |
|
"learning_rate": 3.516129032258065e-05, |
|
"loss": 0.0223, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0218181818181817, |
|
"grad_norm": 0.13240855932235718, |
|
"learning_rate": 3.5096774193548385e-05, |
|
"loss": 0.0024, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.0254545454545454, |
|
"grad_norm": 1.043957233428955, |
|
"learning_rate": 3.503225806451613e-05, |
|
"loss": 0.0082, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.029090909090909, |
|
"grad_norm": 1.6595826148986816, |
|
"learning_rate": 3.4967741935483874e-05, |
|
"loss": 0.0711, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.0327272727272727, |
|
"grad_norm": 0.50986248254776, |
|
"learning_rate": 3.4903225806451616e-05, |
|
"loss": 0.0043, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.0363636363636364, |
|
"grad_norm": 2.219081401824951, |
|
"learning_rate": 3.483870967741936e-05, |
|
"loss": 0.0776, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.7167057991027832, |
|
"learning_rate": 3.47741935483871e-05, |
|
"loss": 0.005, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.0436363636363637, |
|
"grad_norm": 0.13360421359539032, |
|
"learning_rate": 3.470967741935484e-05, |
|
"loss": 0.0023, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.0472727272727274, |
|
"grad_norm": 1.3663884401321411, |
|
"learning_rate": 3.464516129032258e-05, |
|
"loss": 0.0111, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.050909090909091, |
|
"grad_norm": 0.01989702135324478, |
|
"learning_rate": 3.458064516129033e-05, |
|
"loss": 0.001, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.0545454545454545, |
|
"grad_norm": 1.4087766408920288, |
|
"learning_rate": 3.451612903225806e-05, |
|
"loss": 0.014, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0581818181818181, |
|
"grad_norm": 0.47182703018188477, |
|
"learning_rate": 3.445161290322581e-05, |
|
"loss": 0.0047, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.0618181818181818, |
|
"grad_norm": 0.7205504775047302, |
|
"learning_rate": 3.438709677419355e-05, |
|
"loss": 0.0045, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.0654545454545454, |
|
"grad_norm": 0.07702212780714035, |
|
"learning_rate": 3.432258064516129e-05, |
|
"loss": 0.002, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.069090909090909, |
|
"grad_norm": 0.44914644956588745, |
|
"learning_rate": 3.4258064516129035e-05, |
|
"loss": 0.0044, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.0727272727272728, |
|
"grad_norm": 0.26627811789512634, |
|
"learning_rate": 3.4193548387096776e-05, |
|
"loss": 0.0035, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.0763636363636364, |
|
"grad_norm": 1.8016518354415894, |
|
"learning_rate": 3.412903225806452e-05, |
|
"loss": 0.012, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.806971311569214, |
|
"learning_rate": 3.406451612903226e-05, |
|
"loss": 0.051, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0836363636363637, |
|
"grad_norm": 0.03818153962492943, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0014, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.0872727272727274, |
|
"grad_norm": 0.04983190819621086, |
|
"learning_rate": 3.393548387096774e-05, |
|
"loss": 0.0016, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 0.05590420216321945, |
|
"learning_rate": 3.387096774193548e-05, |
|
"loss": 0.0013, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0945454545454545, |
|
"grad_norm": 0.3262059986591339, |
|
"learning_rate": 3.380645161290323e-05, |
|
"loss": 0.0024, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.0981818181818181, |
|
"grad_norm": 0.10000384598970413, |
|
"learning_rate": 3.3741935483870965e-05, |
|
"loss": 0.0016, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.1018181818181818, |
|
"grad_norm": 1.12041437625885, |
|
"learning_rate": 3.367741935483871e-05, |
|
"loss": 0.0116, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.1054545454545455, |
|
"grad_norm": 3.5186989307403564, |
|
"learning_rate": 3.3612903225806454e-05, |
|
"loss": 0.0688, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.1090909090909091, |
|
"grad_norm": 0.599452018737793, |
|
"learning_rate": 3.3548387096774195e-05, |
|
"loss": 0.004, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.1127272727272728, |
|
"grad_norm": 4.202869415283203, |
|
"learning_rate": 3.3483870967741936e-05, |
|
"loss": 0.0139, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.1163636363636364, |
|
"grad_norm": 0.8411999344825745, |
|
"learning_rate": 3.341935483870968e-05, |
|
"loss": 0.0053, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.222166046500206, |
|
"learning_rate": 3.335483870967742e-05, |
|
"loss": 0.0016, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.1236363636363635, |
|
"grad_norm": 0.7113834023475647, |
|
"learning_rate": 3.329032258064516e-05, |
|
"loss": 0.0049, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.1272727272727272, |
|
"grad_norm": 0.022323666140437126, |
|
"learning_rate": 3.322580645161291e-05, |
|
"loss": 0.0008, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.1309090909090909, |
|
"grad_norm": 0.7203249335289001, |
|
"learning_rate": 3.316129032258064e-05, |
|
"loss": 0.0047, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.1345454545454545, |
|
"grad_norm": 0.20508606731891632, |
|
"learning_rate": 3.309677419354839e-05, |
|
"loss": 0.0016, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.1381818181818182, |
|
"grad_norm": 0.1757669299840927, |
|
"learning_rate": 3.303225806451613e-05, |
|
"loss": 0.0012, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.1418181818181818, |
|
"grad_norm": 0.6888485550880432, |
|
"learning_rate": 3.296774193548387e-05, |
|
"loss": 0.1091, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.1454545454545455, |
|
"grad_norm": 3.370866298675537, |
|
"learning_rate": 3.2903225806451614e-05, |
|
"loss": 0.0273, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.1490909090909092, |
|
"grad_norm": 2.7193377017974854, |
|
"learning_rate": 3.2838709677419356e-05, |
|
"loss": 0.1505, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.1527272727272728, |
|
"grad_norm": 0.01697608083486557, |
|
"learning_rate": 3.27741935483871e-05, |
|
"loss": 0.0007, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.1563636363636363, |
|
"grad_norm": 1.244520902633667, |
|
"learning_rate": 3.270967741935484e-05, |
|
"loss": 0.0076, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.18265090882778168, |
|
"learning_rate": 3.2645161290322586e-05, |
|
"loss": 0.0018, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.1636363636363636, |
|
"grad_norm": 0.03304216265678406, |
|
"learning_rate": 3.258064516129033e-05, |
|
"loss": 0.0008, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1672727272727272, |
|
"grad_norm": 1.4162017107009888, |
|
"learning_rate": 3.251612903225806e-05, |
|
"loss": 0.0071, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.170909090909091, |
|
"grad_norm": 0.0734863430261612, |
|
"learning_rate": 3.245161290322581e-05, |
|
"loss": 0.0012, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.1745454545454546, |
|
"grad_norm": 4.381263256072998, |
|
"learning_rate": 3.238709677419355e-05, |
|
"loss": 0.0188, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.1781818181818182, |
|
"grad_norm": 2.71781325340271, |
|
"learning_rate": 3.232258064516129e-05, |
|
"loss": 0.0278, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 0.1261809766292572, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 0.0014, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.1854545454545455, |
|
"grad_norm": 2.4839842319488525, |
|
"learning_rate": 3.2193548387096775e-05, |
|
"loss": 0.072, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.189090909090909, |
|
"grad_norm": 0.1383955031633377, |
|
"learning_rate": 3.2129032258064516e-05, |
|
"loss": 0.0016, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.1927272727272726, |
|
"grad_norm": 3.2481300830841064, |
|
"learning_rate": 3.206451612903226e-05, |
|
"loss": 0.0301, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.1963636363636363, |
|
"grad_norm": 4.403379440307617, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0323, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.015226184390485287, |
|
"learning_rate": 3.193548387096774e-05, |
|
"loss": 0.0007, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.2036363636363636, |
|
"grad_norm": 0.37973034381866455, |
|
"learning_rate": 3.187096774193549e-05, |
|
"loss": 0.0028, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.2072727272727273, |
|
"grad_norm": 6.127589225769043, |
|
"learning_rate": 3.180645161290323e-05, |
|
"loss": 0.0691, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.210909090909091, |
|
"grad_norm": 0.8358224034309387, |
|
"learning_rate": 3.174193548387097e-05, |
|
"loss": 0.1013, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.2145454545454546, |
|
"grad_norm": 0.036338452249765396, |
|
"learning_rate": 3.167741935483871e-05, |
|
"loss": 0.0011, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.2181818181818183, |
|
"grad_norm": 0.02295631729066372, |
|
"learning_rate": 3.161290322580645e-05, |
|
"loss": 0.0009, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.221818181818182, |
|
"grad_norm": 0.05319954827427864, |
|
"learning_rate": 3.1548387096774194e-05, |
|
"loss": 0.0012, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.2254545454545456, |
|
"grad_norm": 6.075497150421143, |
|
"learning_rate": 3.1483870967741935e-05, |
|
"loss": 0.1689, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.229090909090909, |
|
"grad_norm": 0.02964276447892189, |
|
"learning_rate": 3.141935483870968e-05, |
|
"loss": 0.0012, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.2327272727272727, |
|
"grad_norm": 3.0539228916168213, |
|
"learning_rate": 3.135483870967742e-05, |
|
"loss": 0.0269, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.2363636363636363, |
|
"grad_norm": 0.04519687220454216, |
|
"learning_rate": 3.1290322580645166e-05, |
|
"loss": 0.0017, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.10507076978683472, |
|
"learning_rate": 3.122580645161291e-05, |
|
"loss": 0.0027, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.2436363636363637, |
|
"grad_norm": 3.865663766860962, |
|
"learning_rate": 3.116129032258064e-05, |
|
"loss": 0.0435, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.2472727272727273, |
|
"grad_norm": 1.0297927856445312, |
|
"learning_rate": 3.109677419354839e-05, |
|
"loss": 0.0093, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.250909090909091, |
|
"grad_norm": 1.1631922721862793, |
|
"learning_rate": 3.103225806451613e-05, |
|
"loss": 0.0073, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.2545454545454544, |
|
"grad_norm": 0.11844321340322495, |
|
"learning_rate": 3.096774193548387e-05, |
|
"loss": 0.0022, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.2581818181818183, |
|
"grad_norm": 0.6237489581108093, |
|
"learning_rate": 3.090322580645161e-05, |
|
"loss": 0.004, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.2618181818181817, |
|
"grad_norm": 0.883538544178009, |
|
"learning_rate": 3.083870967741936e-05, |
|
"loss": 0.0056, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.2654545454545454, |
|
"grad_norm": 0.5708529949188232, |
|
"learning_rate": 3.0774193548387096e-05, |
|
"loss": 0.0051, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.269090909090909, |
|
"grad_norm": 0.3859453797340393, |
|
"learning_rate": 3.070967741935484e-05, |
|
"loss": 0.0026, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 0.07389602810144424, |
|
"learning_rate": 3.0645161290322585e-05, |
|
"loss": 0.0018, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2763636363636364, |
|
"grad_norm": 5.179856300354004, |
|
"learning_rate": 3.058064516129032e-05, |
|
"loss": 0.1931, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.7609326839447021, |
|
"learning_rate": 3.0516129032258067e-05, |
|
"loss": 0.0627, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.2836363636363637, |
|
"grad_norm": 2.443671464920044, |
|
"learning_rate": 3.0451612903225805e-05, |
|
"loss": 0.0098, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.2872727272727273, |
|
"grad_norm": 0.024096990004181862, |
|
"learning_rate": 3.0387096774193553e-05, |
|
"loss": 0.001, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.290909090909091, |
|
"grad_norm": 0.03544619679450989, |
|
"learning_rate": 3.032258064516129e-05, |
|
"loss": 0.0013, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.2945454545454544, |
|
"grad_norm": 0.8081026673316956, |
|
"learning_rate": 3.0258064516129032e-05, |
|
"loss": 0.0049, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.298181818181818, |
|
"grad_norm": 0.13682828843593597, |
|
"learning_rate": 3.0193548387096777e-05, |
|
"loss": 0.0025, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.3018181818181818, |
|
"grad_norm": 0.04892565682530403, |
|
"learning_rate": 3.0129032258064515e-05, |
|
"loss": 0.0012, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.3054545454545454, |
|
"grad_norm": 0.04417359083890915, |
|
"learning_rate": 3.006451612903226e-05, |
|
"loss": 0.0009, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.309090909090909, |
|
"grad_norm": 1.1065845489501953, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0099, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.3127272727272727, |
|
"grad_norm": 1.6691566705703735, |
|
"learning_rate": 2.9935483870967745e-05, |
|
"loss": 0.0704, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.3163636363636364, |
|
"grad_norm": 0.744577944278717, |
|
"learning_rate": 2.9870967741935487e-05, |
|
"loss": 0.0546, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.7217384576797485, |
|
"learning_rate": 2.9806451612903224e-05, |
|
"loss": 0.11, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.3236363636363637, |
|
"grad_norm": 0.28095120191574097, |
|
"learning_rate": 2.974193548387097e-05, |
|
"loss": 0.0035, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.3272727272727272, |
|
"grad_norm": 0.25743359327316284, |
|
"learning_rate": 2.967741935483871e-05, |
|
"loss": 0.0026, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.330909090909091, |
|
"grad_norm": 0.04120393097400665, |
|
"learning_rate": 2.9612903225806455e-05, |
|
"loss": 0.0012, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.3345454545454545, |
|
"grad_norm": 0.08902228623628616, |
|
"learning_rate": 2.9548387096774193e-05, |
|
"loss": 0.0026, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.3381818181818181, |
|
"grad_norm": 2.2871615886688232, |
|
"learning_rate": 2.9483870967741937e-05, |
|
"loss": 0.0267, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.3418181818181818, |
|
"grad_norm": 0.9016657471656799, |
|
"learning_rate": 2.941935483870968e-05, |
|
"loss": 0.127, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.3454545454545455, |
|
"grad_norm": 3.976222276687622, |
|
"learning_rate": 2.9354838709677417e-05, |
|
"loss": 0.0278, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.3490909090909091, |
|
"grad_norm": 0.11579588800668716, |
|
"learning_rate": 2.9290322580645165e-05, |
|
"loss": 0.0027, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.3527272727272728, |
|
"grad_norm": 2.413121223449707, |
|
"learning_rate": 2.9225806451612902e-05, |
|
"loss": 0.0107, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.3563636363636364, |
|
"grad_norm": 1.4316357374191284, |
|
"learning_rate": 2.9161290322580647e-05, |
|
"loss": 0.0093, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 0.041745781898498535, |
|
"learning_rate": 2.909677419354839e-05, |
|
"loss": 0.0015, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 0.59097820520401, |
|
"learning_rate": 2.9032258064516133e-05, |
|
"loss": 0.0034, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.3672727272727272, |
|
"grad_norm": 0.06494897603988647, |
|
"learning_rate": 2.896774193548387e-05, |
|
"loss": 0.0015, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.3709090909090909, |
|
"grad_norm": 3.1881821155548096, |
|
"learning_rate": 2.8903225806451615e-05, |
|
"loss": 0.0568, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.3745454545454545, |
|
"grad_norm": 0.9627525210380554, |
|
"learning_rate": 2.8838709677419357e-05, |
|
"loss": 0.0067, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.3781818181818182, |
|
"grad_norm": 2.6984145641326904, |
|
"learning_rate": 2.8774193548387095e-05, |
|
"loss": 0.0655, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.3818181818181818, |
|
"grad_norm": 1.9964426755905151, |
|
"learning_rate": 2.8709677419354843e-05, |
|
"loss": 0.0546, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.3854545454545455, |
|
"grad_norm": 0.019922640174627304, |
|
"learning_rate": 2.864516129032258e-05, |
|
"loss": 0.001, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.3890909090909092, |
|
"grad_norm": 3.0499842166900635, |
|
"learning_rate": 2.8580645161290325e-05, |
|
"loss": 0.0726, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.3927272727272726, |
|
"grad_norm": 0.20027759671211243, |
|
"learning_rate": 2.8516129032258066e-05, |
|
"loss": 0.003, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.3963636363636365, |
|
"grad_norm": 0.549941897392273, |
|
"learning_rate": 2.845161290322581e-05, |
|
"loss": 0.1096, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 5.2639946937561035, |
|
"learning_rate": 2.838709677419355e-05, |
|
"loss": 0.0766, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.4036363636363636, |
|
"grad_norm": 0.5863090753555298, |
|
"learning_rate": 2.832258064516129e-05, |
|
"loss": 0.0076, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.4072727272727272, |
|
"grad_norm": 0.020899731665849686, |
|
"learning_rate": 2.8258064516129035e-05, |
|
"loss": 0.001, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.410909090909091, |
|
"grad_norm": 0.2003995031118393, |
|
"learning_rate": 2.8193548387096776e-05, |
|
"loss": 0.0031, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.4145454545454546, |
|
"grad_norm": 2.7366487979888916, |
|
"learning_rate": 2.812903225806452e-05, |
|
"loss": 0.052, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.4181818181818182, |
|
"grad_norm": 0.03661293536424637, |
|
"learning_rate": 2.806451612903226e-05, |
|
"loss": 0.0014, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.4218181818181819, |
|
"grad_norm": 1.4454936981201172, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0122, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.4254545454545455, |
|
"grad_norm": 0.03340213745832443, |
|
"learning_rate": 2.7935483870967744e-05, |
|
"loss": 0.0015, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.4290909090909092, |
|
"grad_norm": 0.654367983341217, |
|
"learning_rate": 2.7870967741935482e-05, |
|
"loss": 0.0069, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.4327272727272726, |
|
"grad_norm": 0.08472099900245667, |
|
"learning_rate": 2.7806451612903227e-05, |
|
"loss": 0.0018, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.4363636363636363, |
|
"grad_norm": 3.4286415576934814, |
|
"learning_rate": 2.7741935483870968e-05, |
|
"loss": 0.0662, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.312857747077942, |
|
"learning_rate": 2.7677419354838713e-05, |
|
"loss": 0.014, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.4436363636363636, |
|
"grad_norm": 0.035416845232248306, |
|
"learning_rate": 2.7612903225806454e-05, |
|
"loss": 0.0013, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.4472727272727273, |
|
"grad_norm": 4.134556770324707, |
|
"learning_rate": 2.75483870967742e-05, |
|
"loss": 0.0354, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.450909090909091, |
|
"grad_norm": 0.43342649936676025, |
|
"learning_rate": 2.7483870967741936e-05, |
|
"loss": 0.0053, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 0.270112007856369, |
|
"learning_rate": 2.7419354838709678e-05, |
|
"loss": 0.0029, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4581818181818182, |
|
"grad_norm": 0.039803147315979004, |
|
"learning_rate": 2.7354838709677422e-05, |
|
"loss": 0.0017, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.461818181818182, |
|
"grad_norm": 2.7924137115478516, |
|
"learning_rate": 2.729032258064516e-05, |
|
"loss": 0.0366, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.4654545454545453, |
|
"grad_norm": 0.29749271273612976, |
|
"learning_rate": 2.7225806451612905e-05, |
|
"loss": 0.0026, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.4690909090909092, |
|
"grad_norm": 1.2988048791885376, |
|
"learning_rate": 2.7161290322580646e-05, |
|
"loss": 0.0083, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.4727272727272727, |
|
"grad_norm": 0.07334749400615692, |
|
"learning_rate": 2.709677419354839e-05, |
|
"loss": 0.0019, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.4763636363636363, |
|
"grad_norm": 0.04504287615418434, |
|
"learning_rate": 2.7032258064516132e-05, |
|
"loss": 0.0018, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.08913391828536987, |
|
"learning_rate": 2.696774193548387e-05, |
|
"loss": 0.0018, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.4836363636363636, |
|
"grad_norm": 0.20501653850078583, |
|
"learning_rate": 2.6903225806451614e-05, |
|
"loss": 0.0022, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.4872727272727273, |
|
"grad_norm": 1.9350942373275757, |
|
"learning_rate": 2.6838709677419355e-05, |
|
"loss": 0.0097, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.490909090909091, |
|
"grad_norm": 4.018691062927246, |
|
"learning_rate": 2.67741935483871e-05, |
|
"loss": 0.0256, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.4945454545454546, |
|
"grad_norm": 0.02390647679567337, |
|
"learning_rate": 2.6709677419354838e-05, |
|
"loss": 0.001, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.498181818181818, |
|
"grad_norm": 2.684476375579834, |
|
"learning_rate": 2.6645161290322586e-05, |
|
"loss": 0.0146, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.501818181818182, |
|
"grad_norm": 0.021060334518551826, |
|
"learning_rate": 2.6580645161290324e-05, |
|
"loss": 0.0009, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.5054545454545454, |
|
"grad_norm": 0.4146246910095215, |
|
"learning_rate": 2.6516129032258065e-05, |
|
"loss": 0.0025, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.509090909090909, |
|
"grad_norm": 3.564082384109497, |
|
"learning_rate": 2.645161290322581e-05, |
|
"loss": 0.0188, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.5127272727272727, |
|
"grad_norm": 0.027183400467038155, |
|
"learning_rate": 2.6387096774193548e-05, |
|
"loss": 0.0012, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.5163636363636364, |
|
"grad_norm": 0.4653225541114807, |
|
"learning_rate": 2.6322580645161292e-05, |
|
"loss": 0.0055, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.2512191832065582, |
|
"learning_rate": 2.6258064516129033e-05, |
|
"loss": 0.0015, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.5236363636363637, |
|
"grad_norm": 2.2450575828552246, |
|
"learning_rate": 2.6193548387096778e-05, |
|
"loss": 0.0209, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.5272727272727273, |
|
"grad_norm": 2.3167543411254883, |
|
"learning_rate": 2.6129032258064516e-05, |
|
"loss": 0.0544, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.5309090909090908, |
|
"grad_norm": 0.03268599510192871, |
|
"learning_rate": 2.6064516129032257e-05, |
|
"loss": 0.0011, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.5345454545454547, |
|
"grad_norm": 0.028883187100291252, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.001, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.538181818181818, |
|
"grad_norm": 0.1544177383184433, |
|
"learning_rate": 2.5935483870967743e-05, |
|
"loss": 0.0022, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.541818181818182, |
|
"grad_norm": 2.9123668670654297, |
|
"learning_rate": 2.5870967741935488e-05, |
|
"loss": 0.0187, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 0.15737684071063995, |
|
"learning_rate": 2.5806451612903226e-05, |
|
"loss": 0.0021, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.549090909090909, |
|
"grad_norm": 0.023125503212213516, |
|
"learning_rate": 2.574193548387097e-05, |
|
"loss": 0.0009, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.5527272727272727, |
|
"grad_norm": 5.130437850952148, |
|
"learning_rate": 2.567741935483871e-05, |
|
"loss": 0.1732, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.5563636363636364, |
|
"grad_norm": 0.017136206850409508, |
|
"learning_rate": 2.561290322580645e-05, |
|
"loss": 0.0008, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.5280985832214355, |
|
"learning_rate": 2.5548387096774197e-05, |
|
"loss": 0.0143, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.5636363636363635, |
|
"grad_norm": 0.2261432558298111, |
|
"learning_rate": 2.5483870967741935e-05, |
|
"loss": 0.0029, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.5672727272727274, |
|
"grad_norm": 0.018230870366096497, |
|
"learning_rate": 2.541935483870968e-05, |
|
"loss": 0.0008, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.5709090909090908, |
|
"grad_norm": 0.12864048779010773, |
|
"learning_rate": 2.535483870967742e-05, |
|
"loss": 0.0016, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.5745454545454547, |
|
"grad_norm": 0.03463654965162277, |
|
"learning_rate": 2.5290322580645166e-05, |
|
"loss": 0.0009, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.5781818181818181, |
|
"grad_norm": 0.04040815308690071, |
|
"learning_rate": 2.5225806451612903e-05, |
|
"loss": 0.0009, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.5818181818181818, |
|
"grad_norm": 6.010333061218262, |
|
"learning_rate": 2.5161290322580645e-05, |
|
"loss": 0.0476, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.5854545454545454, |
|
"grad_norm": 2.538048505783081, |
|
"learning_rate": 2.509677419354839e-05, |
|
"loss": 0.0706, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.589090909090909, |
|
"grad_norm": 0.9795745015144348, |
|
"learning_rate": 2.5032258064516127e-05, |
|
"loss": 0.0043, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.5927272727272728, |
|
"grad_norm": 0.07065194100141525, |
|
"learning_rate": 2.4967741935483872e-05, |
|
"loss": 0.0011, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.5963636363636362, |
|
"grad_norm": 2.8576443195343018, |
|
"learning_rate": 2.4903225806451613e-05, |
|
"loss": 0.0471, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.197402238845825, |
|
"learning_rate": 2.4838709677419354e-05, |
|
"loss": 0.0809, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.6036363636363635, |
|
"grad_norm": 0.3648858368396759, |
|
"learning_rate": 2.47741935483871e-05, |
|
"loss": 0.0038, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.6072727272727274, |
|
"grad_norm": 0.1489875763654709, |
|
"learning_rate": 2.470967741935484e-05, |
|
"loss": 0.0017, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.6109090909090908, |
|
"grad_norm": 0.07092121988534927, |
|
"learning_rate": 2.464516129032258e-05, |
|
"loss": 0.0014, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.6145454545454545, |
|
"grad_norm": 2.7964096069335938, |
|
"learning_rate": 2.4580645161290326e-05, |
|
"loss": 0.0509, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.6181818181818182, |
|
"grad_norm": 1.7272242307662964, |
|
"learning_rate": 2.4516129032258064e-05, |
|
"loss": 0.0067, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.6218181818181818, |
|
"grad_norm": 0.06453117728233337, |
|
"learning_rate": 2.4451612903225805e-05, |
|
"loss": 0.0016, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.6254545454545455, |
|
"grad_norm": 3.195802927017212, |
|
"learning_rate": 2.438709677419355e-05, |
|
"loss": 0.0547, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.6290909090909091, |
|
"grad_norm": 0.01947391778230667, |
|
"learning_rate": 2.432258064516129e-05, |
|
"loss": 0.0008, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.6327272727272728, |
|
"grad_norm": 0.5001751780509949, |
|
"learning_rate": 2.4258064516129032e-05, |
|
"loss": 0.004, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 0.4999580383300781, |
|
"learning_rate": 2.4193548387096777e-05, |
|
"loss": 0.0057, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 1.5922938585281372, |
|
"learning_rate": 2.4129032258064518e-05, |
|
"loss": 0.0116, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.6436363636363636, |
|
"grad_norm": 2.461899518966675, |
|
"learning_rate": 2.406451612903226e-05, |
|
"loss": 0.018, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.6472727272727272, |
|
"grad_norm": 0.07266916334629059, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0014, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.6509090909090909, |
|
"grad_norm": 1.8282543420791626, |
|
"learning_rate": 2.3935483870967742e-05, |
|
"loss": 0.0841, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.6545454545454545, |
|
"grad_norm": 0.06280002743005753, |
|
"learning_rate": 2.3870967741935486e-05, |
|
"loss": 0.0013, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.6581818181818182, |
|
"grad_norm": 0.06250961124897003, |
|
"learning_rate": 2.3806451612903228e-05, |
|
"loss": 0.0016, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.6618181818181819, |
|
"grad_norm": 0.35135146975517273, |
|
"learning_rate": 2.374193548387097e-05, |
|
"loss": 0.0253, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.6654545454545455, |
|
"grad_norm": 0.0423726923763752, |
|
"learning_rate": 2.367741935483871e-05, |
|
"loss": 0.0009, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.669090909090909, |
|
"grad_norm": 1.339455246925354, |
|
"learning_rate": 2.361290322580645e-05, |
|
"loss": 0.0794, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.6727272727272728, |
|
"grad_norm": 0.1556975394487381, |
|
"learning_rate": 2.3548387096774193e-05, |
|
"loss": 0.0018, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6763636363636363, |
|
"grad_norm": 1.5904016494750977, |
|
"learning_rate": 2.3483870967741937e-05, |
|
"loss": 0.0036, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 0.6247179508209229, |
|
"learning_rate": 2.341935483870968e-05, |
|
"loss": 0.0037, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.6836363636363636, |
|
"grad_norm": 1.879459023475647, |
|
"learning_rate": 2.335483870967742e-05, |
|
"loss": 0.0099, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.6872727272727273, |
|
"grad_norm": 0.07403961569070816, |
|
"learning_rate": 2.3290322580645164e-05, |
|
"loss": 0.0016, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.690909090909091, |
|
"grad_norm": 0.05183988809585571, |
|
"learning_rate": 2.3225806451612906e-05, |
|
"loss": 0.0012, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.6945454545454546, |
|
"grad_norm": 5.193166255950928, |
|
"learning_rate": 2.3161290322580644e-05, |
|
"loss": 0.0709, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.6981818181818182, |
|
"grad_norm": 1.2823094129562378, |
|
"learning_rate": 2.3096774193548388e-05, |
|
"loss": 0.0195, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.7018181818181817, |
|
"grad_norm": 2.308457851409912, |
|
"learning_rate": 2.303225806451613e-05, |
|
"loss": 0.0082, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.7054545454545456, |
|
"grad_norm": 0.39891281723976135, |
|
"learning_rate": 2.296774193548387e-05, |
|
"loss": 0.0036, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.709090909090909, |
|
"grad_norm": 0.03392359986901283, |
|
"learning_rate": 2.2903225806451615e-05, |
|
"loss": 0.0012, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.7127272727272729, |
|
"grad_norm": 0.01698954403400421, |
|
"learning_rate": 2.2838709677419357e-05, |
|
"loss": 0.0007, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.7163636363636363, |
|
"grad_norm": 0.059636473655700684, |
|
"learning_rate": 2.2774193548387098e-05, |
|
"loss": 0.0016, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.07056300342082977, |
|
"learning_rate": 2.2709677419354842e-05, |
|
"loss": 0.0017, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.7236363636363636, |
|
"grad_norm": 0.1932046115398407, |
|
"learning_rate": 2.264516129032258e-05, |
|
"loss": 0.0029, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 2.354381561279297, |
|
"learning_rate": 2.258064516129032e-05, |
|
"loss": 0.0141, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.730909090909091, |
|
"grad_norm": 0.17981848120689392, |
|
"learning_rate": 2.2516129032258066e-05, |
|
"loss": 0.002, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.7345454545454544, |
|
"grad_norm": 0.03439001739025116, |
|
"learning_rate": 2.2451612903225807e-05, |
|
"loss": 0.0008, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.7381818181818183, |
|
"grad_norm": 1.2802492380142212, |
|
"learning_rate": 2.238709677419355e-05, |
|
"loss": 0.0143, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.7418181818181817, |
|
"grad_norm": 0.36729562282562256, |
|
"learning_rate": 2.2322580645161293e-05, |
|
"loss": 0.0085, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.7454545454545456, |
|
"grad_norm": 0.03446509316563606, |
|
"learning_rate": 2.2258064516129034e-05, |
|
"loss": 0.0009, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.749090909090909, |
|
"grad_norm": 0.05642567202448845, |
|
"learning_rate": 2.2193548387096776e-05, |
|
"loss": 0.0014, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.7527272727272727, |
|
"grad_norm": 0.12370045483112335, |
|
"learning_rate": 2.2129032258064517e-05, |
|
"loss": 0.0016, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.7563636363636363, |
|
"grad_norm": 0.038627080619335175, |
|
"learning_rate": 2.2064516129032258e-05, |
|
"loss": 0.0011, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.037693917751312256, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.0007, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.7636363636363637, |
|
"grad_norm": 1.4172790050506592, |
|
"learning_rate": 2.1935483870967744e-05, |
|
"loss": 0.0865, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.767272727272727, |
|
"grad_norm": 2.643702268600464, |
|
"learning_rate": 2.1870967741935485e-05, |
|
"loss": 0.0205, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.770909090909091, |
|
"grad_norm": 3.630894899368286, |
|
"learning_rate": 2.1806451612903227e-05, |
|
"loss": 0.0227, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.7745454545454544, |
|
"grad_norm": 0.11117129772901535, |
|
"learning_rate": 2.1741935483870968e-05, |
|
"loss": 0.0014, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.7781818181818183, |
|
"grad_norm": 0.041525308042764664, |
|
"learning_rate": 2.167741935483871e-05, |
|
"loss": 0.001, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.7818181818181817, |
|
"grad_norm": 0.8061334490776062, |
|
"learning_rate": 2.1612903225806454e-05, |
|
"loss": 0.1222, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.7854545454545454, |
|
"grad_norm": 0.03137822821736336, |
|
"learning_rate": 2.1548387096774195e-05, |
|
"loss": 0.0008, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.789090909090909, |
|
"grad_norm": 0.44713956117630005, |
|
"learning_rate": 2.1483870967741936e-05, |
|
"loss": 0.0023, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.7927272727272727, |
|
"grad_norm": 0.43540823459625244, |
|
"learning_rate": 2.141935483870968e-05, |
|
"loss": 0.0035, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.7963636363636364, |
|
"grad_norm": 0.1677086055278778, |
|
"learning_rate": 2.1354838709677422e-05, |
|
"loss": 0.002, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.30730533599853516, |
|
"learning_rate": 2.129032258064516e-05, |
|
"loss": 0.0044, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.8036363636363637, |
|
"grad_norm": 0.14470332860946655, |
|
"learning_rate": 2.1225806451612904e-05, |
|
"loss": 0.0018, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.8072727272727271, |
|
"grad_norm": 0.05039800703525543, |
|
"learning_rate": 2.1161290322580646e-05, |
|
"loss": 0.0011, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.810909090909091, |
|
"grad_norm": 0.15073451399803162, |
|
"learning_rate": 2.1096774193548387e-05, |
|
"loss": 0.0018, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.8145454545454545, |
|
"grad_norm": 2.081968307495117, |
|
"learning_rate": 2.103225806451613e-05, |
|
"loss": 0.0765, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.030230529606342316, |
|
"learning_rate": 2.0967741935483873e-05, |
|
"loss": 0.001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.8218181818181818, |
|
"grad_norm": 0.2655714750289917, |
|
"learning_rate": 2.0903225806451614e-05, |
|
"loss": 0.0021, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.8254545454545454, |
|
"grad_norm": 0.15943261981010437, |
|
"learning_rate": 2.0838709677419355e-05, |
|
"loss": 0.0015, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.829090909090909, |
|
"grad_norm": 4.7471489906311035, |
|
"learning_rate": 2.0774193548387097e-05, |
|
"loss": 0.1086, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.8327272727272728, |
|
"grad_norm": 0.1162559986114502, |
|
"learning_rate": 2.0709677419354838e-05, |
|
"loss": 0.0016, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.8363636363636364, |
|
"grad_norm": 0.0627504363656044, |
|
"learning_rate": 2.0645161290322582e-05, |
|
"loss": 0.0013, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.5027517080307007, |
|
"learning_rate": 2.0580645161290324e-05, |
|
"loss": 0.0035, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.8436363636363637, |
|
"grad_norm": 0.02140502817928791, |
|
"learning_rate": 2.0516129032258065e-05, |
|
"loss": 0.0008, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.8472727272727272, |
|
"grad_norm": 0.07203751057386398, |
|
"learning_rate": 2.045161290322581e-05, |
|
"loss": 0.0018, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.850909090909091, |
|
"grad_norm": 1.8638368844985962, |
|
"learning_rate": 2.0387096774193547e-05, |
|
"loss": 0.0594, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.8545454545454545, |
|
"grad_norm": 0.12549816071987152, |
|
"learning_rate": 2.0322580645161292e-05, |
|
"loss": 0.0019, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.8581818181818182, |
|
"grad_norm": 0.5917963981628418, |
|
"learning_rate": 2.0258064516129033e-05, |
|
"loss": 0.0044, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.8618181818181818, |
|
"grad_norm": 0.013607682660222054, |
|
"learning_rate": 2.0193548387096775e-05, |
|
"loss": 0.0007, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.8654545454545455, |
|
"grad_norm": 0.01925772987306118, |
|
"learning_rate": 2.0129032258064516e-05, |
|
"loss": 0.0007, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.8690909090909091, |
|
"grad_norm": 0.023444948717951775, |
|
"learning_rate": 2.006451612903226e-05, |
|
"loss": 0.0008, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.8727272727272726, |
|
"grad_norm": 0.19968028366565704, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0018, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.8763636363636365, |
|
"grad_norm": 1.308447003364563, |
|
"learning_rate": 1.9935483870967743e-05, |
|
"loss": 0.0883, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.20825034379959106, |
|
"learning_rate": 1.9870967741935484e-05, |
|
"loss": 0.0022, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.8836363636363638, |
|
"grad_norm": 0.06526435911655426, |
|
"learning_rate": 1.9806451612903225e-05, |
|
"loss": 0.0014, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.8872727272727272, |
|
"grad_norm": 0.1210133358836174, |
|
"learning_rate": 1.974193548387097e-05, |
|
"loss": 0.0009, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.8909090909090909, |
|
"grad_norm": 1.0739092826843262, |
|
"learning_rate": 1.967741935483871e-05, |
|
"loss": 0.1127, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.8945454545454545, |
|
"grad_norm": 0.018074801191687584, |
|
"learning_rate": 1.9612903225806452e-05, |
|
"loss": 0.0008, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.8981818181818182, |
|
"grad_norm": 2.0916597843170166, |
|
"learning_rate": 1.9548387096774197e-05, |
|
"loss": 0.0115, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.9018181818181819, |
|
"grad_norm": 0.015548643656075, |
|
"learning_rate": 1.9483870967741935e-05, |
|
"loss": 0.0006, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.9054545454545453, |
|
"grad_norm": 0.10145322978496552, |
|
"learning_rate": 1.9419354838709676e-05, |
|
"loss": 0.002, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 0.24982163310050964, |
|
"learning_rate": 1.935483870967742e-05, |
|
"loss": 0.0024, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.9127272727272726, |
|
"grad_norm": 0.14598214626312256, |
|
"learning_rate": 1.9290322580645162e-05, |
|
"loss": 0.0017, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.9163636363636365, |
|
"grad_norm": 4.5397629737854, |
|
"learning_rate": 1.9225806451612903e-05, |
|
"loss": 0.1227, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.5093303322792053, |
|
"learning_rate": 1.9161290322580648e-05, |
|
"loss": 0.0323, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.9236363636363636, |
|
"grad_norm": 0.14823508262634277, |
|
"learning_rate": 1.909677419354839e-05, |
|
"loss": 0.0017, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.9272727272727272, |
|
"grad_norm": 0.6760469675064087, |
|
"learning_rate": 1.9032258064516127e-05, |
|
"loss": 0.0054, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.930909090909091, |
|
"grad_norm": 0.05310118570923805, |
|
"learning_rate": 1.896774193548387e-05, |
|
"loss": 0.0012, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.9345454545454546, |
|
"grad_norm": 0.2781686782836914, |
|
"learning_rate": 1.8903225806451613e-05, |
|
"loss": 0.0026, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.9381818181818182, |
|
"grad_norm": 0.0464974045753479, |
|
"learning_rate": 1.8838709677419354e-05, |
|
"loss": 0.0015, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.9418181818181819, |
|
"grad_norm": 0.5551739931106567, |
|
"learning_rate": 1.87741935483871e-05, |
|
"loss": 0.0065, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.9454545454545453, |
|
"grad_norm": 0.08245756477117538, |
|
"learning_rate": 1.870967741935484e-05, |
|
"loss": 0.0015, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.9490909090909092, |
|
"grad_norm": 1.5298570394515991, |
|
"learning_rate": 1.864516129032258e-05, |
|
"loss": 0.0814, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.9527272727272726, |
|
"grad_norm": 0.028485940769314766, |
|
"learning_rate": 1.8580645161290326e-05, |
|
"loss": 0.0009, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.9563636363636365, |
|
"grad_norm": 0.12668201327323914, |
|
"learning_rate": 1.8516129032258064e-05, |
|
"loss": 0.0015, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.11611904203891754, |
|
"learning_rate": 1.845161290322581e-05, |
|
"loss": 0.0023, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.9636363636363636, |
|
"grad_norm": 1.0167148113250732, |
|
"learning_rate": 1.838709677419355e-05, |
|
"loss": 0.0055, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.9672727272727273, |
|
"grad_norm": 4.344989776611328, |
|
"learning_rate": 1.832258064516129e-05, |
|
"loss": 0.0237, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.970909090909091, |
|
"grad_norm": 1.655159831047058, |
|
"learning_rate": 1.8258064516129032e-05, |
|
"loss": 0.01, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.9745454545454546, |
|
"grad_norm": 0.03663047030568123, |
|
"learning_rate": 1.8193548387096777e-05, |
|
"loss": 0.0012, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.978181818181818, |
|
"grad_norm": 0.4332762062549591, |
|
"learning_rate": 1.8129032258064518e-05, |
|
"loss": 0.0039, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.981818181818182, |
|
"grad_norm": 3.9883310794830322, |
|
"learning_rate": 1.806451612903226e-05, |
|
"loss": 0.0309, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.9854545454545454, |
|
"grad_norm": 2.0200157165527344, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0127, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.9890909090909092, |
|
"grad_norm": 1.3924773931503296, |
|
"learning_rate": 1.7935483870967742e-05, |
|
"loss": 0.0069, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.9927272727272727, |
|
"grad_norm": 0.2380281239748001, |
|
"learning_rate": 1.7870967741935486e-05, |
|
"loss": 0.0196, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.9963636363636363, |
|
"grad_norm": 0.9676334261894226, |
|
"learning_rate": 1.7806451612903228e-05, |
|
"loss": 0.1096, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.012336778454482555, |
|
"learning_rate": 1.774193548387097e-05, |
|
"loss": 0.0006, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9918032786885246, |
|
"eval_loss": 0.03281828388571739, |
|
"eval_runtime": 12.743, |
|
"eval_samples_per_second": 344.661, |
|
"eval_steps_per_second": 5.415, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.0036363636363634, |
|
"grad_norm": 0.019314678385853767, |
|
"learning_rate": 1.7677419354838713e-05, |
|
"loss": 0.0008, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 2.0072727272727273, |
|
"grad_norm": 0.11856160312891006, |
|
"learning_rate": 1.761290322580645e-05, |
|
"loss": 0.0016, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.0109090909090908, |
|
"grad_norm": 0.019284116104245186, |
|
"learning_rate": 1.7548387096774193e-05, |
|
"loss": 0.0008, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 2.0145454545454546, |
|
"grad_norm": 0.701896607875824, |
|
"learning_rate": 1.7483870967741937e-05, |
|
"loss": 0.0056, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 2.018181818181818, |
|
"grad_norm": 0.1820172518491745, |
|
"learning_rate": 1.741935483870968e-05, |
|
"loss": 0.0016, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 2.021818181818182, |
|
"grad_norm": 0.014085445553064346, |
|
"learning_rate": 1.735483870967742e-05, |
|
"loss": 0.0007, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 2.0254545454545454, |
|
"grad_norm": 0.01478899922221899, |
|
"learning_rate": 1.7290322580645164e-05, |
|
"loss": 0.0007, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 2.0290909090909093, |
|
"grad_norm": 0.01645870879292488, |
|
"learning_rate": 1.7225806451612906e-05, |
|
"loss": 0.0007, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.0327272727272727, |
|
"grad_norm": 0.09879927337169647, |
|
"learning_rate": 1.7161290322580643e-05, |
|
"loss": 0.0013, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 2.036363636363636, |
|
"grad_norm": 0.016561178490519524, |
|
"learning_rate": 1.7096774193548388e-05, |
|
"loss": 0.0008, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.030180329456925392, |
|
"learning_rate": 1.703225806451613e-05, |
|
"loss": 0.0011, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 2.0436363636363635, |
|
"grad_norm": 0.023942433297634125, |
|
"learning_rate": 1.696774193548387e-05, |
|
"loss": 0.0009, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 2.0472727272727274, |
|
"grad_norm": 0.5132169127464294, |
|
"learning_rate": 1.6903225806451615e-05, |
|
"loss": 0.0216, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 2.050909090909091, |
|
"grad_norm": 0.01695244014263153, |
|
"learning_rate": 1.6838709677419356e-05, |
|
"loss": 0.0008, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 2.0545454545454547, |
|
"grad_norm": 0.019326528534293175, |
|
"learning_rate": 1.6774193548387098e-05, |
|
"loss": 0.0008, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 2.058181818181818, |
|
"grad_norm": 0.018215378746390343, |
|
"learning_rate": 1.670967741935484e-05, |
|
"loss": 0.0007, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 2.061818181818182, |
|
"grad_norm": 0.02021806314587593, |
|
"learning_rate": 1.664516129032258e-05, |
|
"loss": 0.0008, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 2.0654545454545454, |
|
"grad_norm": 0.030195925384759903, |
|
"learning_rate": 1.658064516129032e-05, |
|
"loss": 0.0009, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 2.0690909090909093, |
|
"grad_norm": 0.2582416236400604, |
|
"learning_rate": 1.6516129032258066e-05, |
|
"loss": 0.0014, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 2.0727272727272728, |
|
"grad_norm": 0.052597131580114365, |
|
"learning_rate": 1.6451612903225807e-05, |
|
"loss": 0.0013, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.076363636363636, |
|
"grad_norm": 4.027952194213867, |
|
"learning_rate": 1.638709677419355e-05, |
|
"loss": 0.0367, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 3.2471768856048584, |
|
"learning_rate": 1.6322580645161293e-05, |
|
"loss": 0.0131, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.0836363636363635, |
|
"grad_norm": 0.013683440163731575, |
|
"learning_rate": 1.625806451612903e-05, |
|
"loss": 0.0006, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 2.0872727272727274, |
|
"grad_norm": 0.9055846929550171, |
|
"learning_rate": 1.6193548387096776e-05, |
|
"loss": 0.0045, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 0.06875227391719818, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 0.0011, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.0945454545454547, |
|
"grad_norm": 0.08661270886659622, |
|
"learning_rate": 1.6064516129032258e-05, |
|
"loss": 0.0018, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 2.098181818181818, |
|
"grad_norm": 0.3580359220504761, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0208, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 2.101818181818182, |
|
"grad_norm": 3.5332114696502686, |
|
"learning_rate": 1.5935483870967744e-05, |
|
"loss": 0.0246, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 2.1054545454545455, |
|
"grad_norm": 0.23546698689460754, |
|
"learning_rate": 1.5870967741935485e-05, |
|
"loss": 0.0058, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 2.109090909090909, |
|
"grad_norm": 0.0922674611210823, |
|
"learning_rate": 1.5806451612903226e-05, |
|
"loss": 0.001, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.112727272727273, |
|
"grad_norm": 0.03179372474551201, |
|
"learning_rate": 1.5741935483870968e-05, |
|
"loss": 0.0009, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 2.1163636363636362, |
|
"grad_norm": 0.08128567039966583, |
|
"learning_rate": 1.567741935483871e-05, |
|
"loss": 0.0021, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 0.03141499683260918, |
|
"learning_rate": 1.5612903225806454e-05, |
|
"loss": 0.0009, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 2.1236363636363635, |
|
"grad_norm": 0.056340087205171585, |
|
"learning_rate": 1.5548387096774195e-05, |
|
"loss": 0.0012, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 2.1272727272727274, |
|
"grad_norm": 0.011782072484493256, |
|
"learning_rate": 1.5483870967741936e-05, |
|
"loss": 0.0005, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 2.130909090909091, |
|
"grad_norm": 0.017823919653892517, |
|
"learning_rate": 1.541935483870968e-05, |
|
"loss": 0.0007, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 2.1345454545454547, |
|
"grad_norm": 0.19493459165096283, |
|
"learning_rate": 1.535483870967742e-05, |
|
"loss": 0.0015, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 2.138181818181818, |
|
"grad_norm": 3.2469050884246826, |
|
"learning_rate": 1.529032258064516e-05, |
|
"loss": 0.0172, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 2.1418181818181816, |
|
"grad_norm": 0.05841919407248497, |
|
"learning_rate": 1.5225806451612903e-05, |
|
"loss": 0.0008, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 2.1454545454545455, |
|
"grad_norm": 1.5849275588989258, |
|
"learning_rate": 1.5161290322580646e-05, |
|
"loss": 0.0064, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.149090909090909, |
|
"grad_norm": 0.0897936001420021, |
|
"learning_rate": 1.5096774193548389e-05, |
|
"loss": 0.0012, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 2.152727272727273, |
|
"grad_norm": 0.030406808480620384, |
|
"learning_rate": 1.503225806451613e-05, |
|
"loss": 0.0008, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 2.1563636363636363, |
|
"grad_norm": 0.42377015948295593, |
|
"learning_rate": 1.4967741935483873e-05, |
|
"loss": 0.0014, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 0.037775181233882904, |
|
"learning_rate": 1.4903225806451612e-05, |
|
"loss": 0.0009, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 2.1636363636363636, |
|
"grad_norm": 0.01757960021495819, |
|
"learning_rate": 1.4838709677419355e-05, |
|
"loss": 0.0007, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 2.1672727272727275, |
|
"grad_norm": 0.13892705738544464, |
|
"learning_rate": 1.4774193548387096e-05, |
|
"loss": 0.0013, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 2.170909090909091, |
|
"grad_norm": 0.24957135319709778, |
|
"learning_rate": 1.470967741935484e-05, |
|
"loss": 0.0021, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 2.174545454545455, |
|
"grad_norm": 0.042269039899110794, |
|
"learning_rate": 1.4645161290322582e-05, |
|
"loss": 0.0007, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.178181818181818, |
|
"grad_norm": 0.027613814920186996, |
|
"learning_rate": 1.4580645161290324e-05, |
|
"loss": 0.0006, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 0.09081171452999115, |
|
"learning_rate": 1.4516129032258066e-05, |
|
"loss": 0.0007, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.1854545454545455, |
|
"grad_norm": 0.014807288534939289, |
|
"learning_rate": 1.4451612903225808e-05, |
|
"loss": 0.0006, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 2.189090909090909, |
|
"grad_norm": 0.016062721610069275, |
|
"learning_rate": 1.4387096774193547e-05, |
|
"loss": 0.0006, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 2.192727272727273, |
|
"grad_norm": 0.011297466233372688, |
|
"learning_rate": 1.432258064516129e-05, |
|
"loss": 0.0005, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 2.1963636363636363, |
|
"grad_norm": 0.018607553094625473, |
|
"learning_rate": 1.4258064516129033e-05, |
|
"loss": 0.0006, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.5661729574203491, |
|
"learning_rate": 1.4193548387096774e-05, |
|
"loss": 0.0571, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 2.2036363636363636, |
|
"grad_norm": 0.018998922780156136, |
|
"learning_rate": 1.4129032258064517e-05, |
|
"loss": 0.0007, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 2.207272727272727, |
|
"grad_norm": 0.00895577110350132, |
|
"learning_rate": 1.406451612903226e-05, |
|
"loss": 0.0004, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 2.210909090909091, |
|
"grad_norm": 0.023930538445711136, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.0006, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 2.2145454545454544, |
|
"grad_norm": 0.31831488013267517, |
|
"learning_rate": 1.3935483870967741e-05, |
|
"loss": 0.002, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 2.2181818181818183, |
|
"grad_norm": 0.46921107172966003, |
|
"learning_rate": 1.3870967741935484e-05, |
|
"loss": 0.0026, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.2218181818181817, |
|
"grad_norm": 1.8380354642868042, |
|
"learning_rate": 1.3806451612903227e-05, |
|
"loss": 0.0341, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 2.2254545454545456, |
|
"grad_norm": 0.016989752650260925, |
|
"learning_rate": 1.3741935483870968e-05, |
|
"loss": 0.0006, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 2.229090909090909, |
|
"grad_norm": 0.04719064012169838, |
|
"learning_rate": 1.3677419354838711e-05, |
|
"loss": 0.0009, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 2.232727272727273, |
|
"grad_norm": 0.45522865653038025, |
|
"learning_rate": 1.3612903225806452e-05, |
|
"loss": 0.004, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 2.2363636363636363, |
|
"grad_norm": 4.254357814788818, |
|
"learning_rate": 1.3548387096774195e-05, |
|
"loss": 0.0321, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 0.42286837100982666, |
|
"learning_rate": 1.3483870967741935e-05, |
|
"loss": 0.0112, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 2.2436363636363637, |
|
"grad_norm": 0.011312934570014477, |
|
"learning_rate": 1.3419354838709678e-05, |
|
"loss": 0.0005, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 2.247272727272727, |
|
"grad_norm": 0.056383103132247925, |
|
"learning_rate": 1.3354838709677419e-05, |
|
"loss": 0.0007, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.250909090909091, |
|
"grad_norm": 0.008479413576424122, |
|
"learning_rate": 1.3290322580645162e-05, |
|
"loss": 0.0004, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 2.2545454545454544, |
|
"grad_norm": 0.2737773358821869, |
|
"learning_rate": 1.3225806451612905e-05, |
|
"loss": 0.017, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.2581818181818183, |
|
"grad_norm": 0.021578149870038033, |
|
"learning_rate": 1.3161290322580646e-05, |
|
"loss": 0.0005, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 2.2618181818181817, |
|
"grad_norm": 0.017386844381690025, |
|
"learning_rate": 1.3096774193548389e-05, |
|
"loss": 0.0005, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 2.2654545454545456, |
|
"grad_norm": 0.010488376021385193, |
|
"learning_rate": 1.3032258064516129e-05, |
|
"loss": 0.0005, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 2.269090909090909, |
|
"grad_norm": 0.2101997584104538, |
|
"learning_rate": 1.2967741935483872e-05, |
|
"loss": 0.0067, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 0.01739770732820034, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 0.0006, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.2763636363636364, |
|
"grad_norm": 4.312204837799072, |
|
"learning_rate": 1.2838709677419356e-05, |
|
"loss": 0.0568, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 2.2800000000000002, |
|
"grad_norm": 0.03970419242978096, |
|
"learning_rate": 1.2774193548387099e-05, |
|
"loss": 0.0008, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 2.2836363636363637, |
|
"grad_norm": 0.06886734068393707, |
|
"learning_rate": 1.270967741935484e-05, |
|
"loss": 0.001, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 2.287272727272727, |
|
"grad_norm": 0.020005859434604645, |
|
"learning_rate": 1.2645161290322583e-05, |
|
"loss": 0.0006, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 2.290909090909091, |
|
"grad_norm": 0.042734235525131226, |
|
"learning_rate": 1.2580645161290322e-05, |
|
"loss": 0.0006, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.2945454545454544, |
|
"grad_norm": 0.1663779318332672, |
|
"learning_rate": 1.2516129032258064e-05, |
|
"loss": 0.0013, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 2.2981818181818183, |
|
"grad_norm": 0.8976339101791382, |
|
"learning_rate": 1.2451612903225807e-05, |
|
"loss": 0.0025, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 2.3018181818181818, |
|
"grad_norm": 0.013320104219019413, |
|
"learning_rate": 1.238709677419355e-05, |
|
"loss": 0.0005, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 2.3054545454545456, |
|
"grad_norm": 0.20691031217575073, |
|
"learning_rate": 1.232258064516129e-05, |
|
"loss": 0.001, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.309090909090909, |
|
"grad_norm": 0.05960312858223915, |
|
"learning_rate": 1.2258064516129032e-05, |
|
"loss": 0.0008, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 2.3127272727272725, |
|
"grad_norm": 3.190467596054077, |
|
"learning_rate": 1.2193548387096775e-05, |
|
"loss": 0.0132, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.3163636363636364, |
|
"grad_norm": 0.9548889994621277, |
|
"learning_rate": 1.2129032258064516e-05, |
|
"loss": 0.0046, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 0.037591926753520966, |
|
"learning_rate": 1.2064516129032259e-05, |
|
"loss": 0.0006, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.3236363636363637, |
|
"grad_norm": 0.025826606899499893, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0006, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 2.327272727272727, |
|
"grad_norm": 0.13162964582443237, |
|
"learning_rate": 1.1935483870967743e-05, |
|
"loss": 0.0013, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.330909090909091, |
|
"grad_norm": 0.009305083192884922, |
|
"learning_rate": 1.1870967741935484e-05, |
|
"loss": 0.0004, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 2.3345454545454545, |
|
"grad_norm": 0.00890254881232977, |
|
"learning_rate": 1.1806451612903226e-05, |
|
"loss": 0.0004, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.3381818181818184, |
|
"grad_norm": 0.00882215891033411, |
|
"learning_rate": 1.1741935483870969e-05, |
|
"loss": 0.0004, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 2.341818181818182, |
|
"grad_norm": 0.11077472567558289, |
|
"learning_rate": 1.167741935483871e-05, |
|
"loss": 0.0008, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.3454545454545457, |
|
"grad_norm": 0.016244694590568542, |
|
"learning_rate": 1.1612903225806453e-05, |
|
"loss": 0.0005, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 2.349090909090909, |
|
"grad_norm": 0.038961056619882584, |
|
"learning_rate": 1.1548387096774194e-05, |
|
"loss": 0.0007, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 2.3527272727272726, |
|
"grad_norm": 0.008070679381489754, |
|
"learning_rate": 1.1483870967741935e-05, |
|
"loss": 0.0004, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 2.3563636363636364, |
|
"grad_norm": 0.03419802337884903, |
|
"learning_rate": 1.1419354838709678e-05, |
|
"loss": 0.0007, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 0.1610228568315506, |
|
"learning_rate": 1.1354838709677421e-05, |
|
"loss": 0.0018, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 0.21933913230895996, |
|
"learning_rate": 1.129032258064516e-05, |
|
"loss": 0.0012, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.367272727272727, |
|
"grad_norm": 0.024096714332699776, |
|
"learning_rate": 1.1225806451612904e-05, |
|
"loss": 0.0005, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 2.370909090909091, |
|
"grad_norm": 0.013976830057799816, |
|
"learning_rate": 1.1161290322580647e-05, |
|
"loss": 0.0005, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.3745454545454545, |
|
"grad_norm": 0.009037399664521217, |
|
"learning_rate": 1.1096774193548388e-05, |
|
"loss": 0.0004, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 2.378181818181818, |
|
"grad_norm": 4.028433799743652, |
|
"learning_rate": 1.1032258064516129e-05, |
|
"loss": 0.0074, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 2.381818181818182, |
|
"grad_norm": 0.9609191417694092, |
|
"learning_rate": 1.0967741935483872e-05, |
|
"loss": 0.0054, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 2.3854545454545453, |
|
"grad_norm": 1.4599242210388184, |
|
"learning_rate": 1.0903225806451613e-05, |
|
"loss": 0.0105, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 2.389090909090909, |
|
"grad_norm": 1.0803523063659668, |
|
"learning_rate": 1.0838709677419355e-05, |
|
"loss": 0.0037, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 2.3927272727272726, |
|
"grad_norm": 0.006454968359321356, |
|
"learning_rate": 1.0774193548387097e-05, |
|
"loss": 0.0003, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 2.3963636363636365, |
|
"grad_norm": 0.043990444391965866, |
|
"learning_rate": 1.070967741935484e-05, |
|
"loss": 0.0008, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.04728386178612709, |
|
"learning_rate": 1.064516129032258e-05, |
|
"loss": 0.0006, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.403636363636364, |
|
"grad_norm": 0.012434919364750385, |
|
"learning_rate": 1.0580645161290323e-05, |
|
"loss": 0.0005, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 2.4072727272727272, |
|
"grad_norm": 0.010447041131556034, |
|
"learning_rate": 1.0516129032258066e-05, |
|
"loss": 0.0003, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 2.410909090909091, |
|
"grad_norm": 0.007828759960830212, |
|
"learning_rate": 1.0451612903225807e-05, |
|
"loss": 0.0004, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 2.4145454545454546, |
|
"grad_norm": 0.00887393206357956, |
|
"learning_rate": 1.0387096774193548e-05, |
|
"loss": 0.0004, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 2.418181818181818, |
|
"grad_norm": 0.8037237524986267, |
|
"learning_rate": 1.0322580645161291e-05, |
|
"loss": 0.0983, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 2.421818181818182, |
|
"grad_norm": 0.014007828198373318, |
|
"learning_rate": 1.0258064516129032e-05, |
|
"loss": 0.0004, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 2.4254545454545453, |
|
"grad_norm": 0.010563932359218597, |
|
"learning_rate": 1.0193548387096774e-05, |
|
"loss": 0.0004, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 2.429090909090909, |
|
"grad_norm": 0.009581638500094414, |
|
"learning_rate": 1.0129032258064517e-05, |
|
"loss": 0.0004, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 2.4327272727272726, |
|
"grad_norm": 0.014039217494428158, |
|
"learning_rate": 1.0064516129032258e-05, |
|
"loss": 0.0004, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 2.4363636363636365, |
|
"grad_norm": 0.035384513437747955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0005, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.011463082395493984, |
|
"learning_rate": 9.935483870967742e-06, |
|
"loss": 0.0004, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 2.443636363636364, |
|
"grad_norm": 0.008181700482964516, |
|
"learning_rate": 9.870967741935485e-06, |
|
"loss": 0.0004, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 2.4472727272727273, |
|
"grad_norm": 0.7373052835464478, |
|
"learning_rate": 9.806451612903226e-06, |
|
"loss": 0.006, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 2.450909090909091, |
|
"grad_norm": 0.018753718584775925, |
|
"learning_rate": 9.741935483870967e-06, |
|
"loss": 0.0004, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 0.009037282317876816, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 0.0004, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.458181818181818, |
|
"grad_norm": 0.016004854813218117, |
|
"learning_rate": 9.612903225806452e-06, |
|
"loss": 0.0005, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 2.461818181818182, |
|
"grad_norm": 0.037706032395362854, |
|
"learning_rate": 9.548387096774195e-06, |
|
"loss": 0.0006, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 2.4654545454545453, |
|
"grad_norm": 0.05756361410021782, |
|
"learning_rate": 9.483870967741936e-06, |
|
"loss": 0.0008, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 2.4690909090909092, |
|
"grad_norm": 0.0136951869353652, |
|
"learning_rate": 9.419354838709677e-06, |
|
"loss": 0.0005, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 2.4727272727272727, |
|
"grad_norm": 0.011940555647015572, |
|
"learning_rate": 9.35483870967742e-06, |
|
"loss": 0.0006, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.4763636363636365, |
|
"grad_norm": 0.25020283460617065, |
|
"learning_rate": 9.290322580645163e-06, |
|
"loss": 0.0088, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.008918453007936478, |
|
"learning_rate": 9.225806451612904e-06, |
|
"loss": 0.0004, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 2.4836363636363634, |
|
"grad_norm": 0.0643925741314888, |
|
"learning_rate": 9.161290322580645e-06, |
|
"loss": 0.0008, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 2.4872727272727273, |
|
"grad_norm": 0.08033094555139542, |
|
"learning_rate": 9.096774193548388e-06, |
|
"loss": 0.0007, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 2.4909090909090907, |
|
"grad_norm": 0.009339767508208752, |
|
"learning_rate": 9.03225806451613e-06, |
|
"loss": 0.0004, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 2.4945454545454546, |
|
"grad_norm": 0.012149178422987461, |
|
"learning_rate": 8.967741935483871e-06, |
|
"loss": 0.0005, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 2.498181818181818, |
|
"grad_norm": 0.028004921972751617, |
|
"learning_rate": 8.903225806451614e-06, |
|
"loss": 0.0004, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 2.501818181818182, |
|
"grad_norm": 0.013502350077033043, |
|
"learning_rate": 8.838709677419357e-06, |
|
"loss": 0.0004, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 2.5054545454545454, |
|
"grad_norm": 0.008973742835223675, |
|
"learning_rate": 8.774193548387096e-06, |
|
"loss": 0.0004, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 2.509090909090909, |
|
"grad_norm": 0.017967596650123596, |
|
"learning_rate": 8.70967741935484e-06, |
|
"loss": 0.0005, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.5127272727272727, |
|
"grad_norm": 0.009786793030798435, |
|
"learning_rate": 8.645161290322582e-06, |
|
"loss": 0.0004, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 2.5163636363636366, |
|
"grad_norm": 0.03004172444343567, |
|
"learning_rate": 8.580645161290322e-06, |
|
"loss": 0.0007, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 0.014010576531291008, |
|
"learning_rate": 8.516129032258065e-06, |
|
"loss": 0.0005, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 2.5236363636363635, |
|
"grad_norm": 0.10186956822872162, |
|
"learning_rate": 8.451612903225808e-06, |
|
"loss": 0.0007, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 2.5272727272727273, |
|
"grad_norm": 0.007655604742467403, |
|
"learning_rate": 8.387096774193549e-06, |
|
"loss": 0.0003, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.5309090909090908, |
|
"grad_norm": 0.026972953230142593, |
|
"learning_rate": 8.32258064516129e-06, |
|
"loss": 0.0006, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.5345454545454547, |
|
"grad_norm": 0.00847043376415968, |
|
"learning_rate": 8.258064516129033e-06, |
|
"loss": 0.0004, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.538181818181818, |
|
"grad_norm": 3.0761332511901855, |
|
"learning_rate": 8.193548387096774e-06, |
|
"loss": 0.045, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.541818181818182, |
|
"grad_norm": 0.010722169652581215, |
|
"learning_rate": 8.129032258064515e-06, |
|
"loss": 0.0004, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 0.024674193933606148, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 0.0004, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.549090909090909, |
|
"grad_norm": 0.010132328607141972, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0004, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 2.5527272727272727, |
|
"grad_norm": 0.007771102711558342, |
|
"learning_rate": 7.935483870967743e-06, |
|
"loss": 0.0003, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 2.5563636363636366, |
|
"grad_norm": 0.009345349855720997, |
|
"learning_rate": 7.870967741935484e-06, |
|
"loss": 0.0004, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.061426129192113876, |
|
"learning_rate": 7.806451612903227e-06, |
|
"loss": 0.0012, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 2.5636363636363635, |
|
"grad_norm": 0.006356612779200077, |
|
"learning_rate": 7.741935483870968e-06, |
|
"loss": 0.0003, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 2.5672727272727274, |
|
"grad_norm": 0.5145014524459839, |
|
"learning_rate": 7.67741935483871e-06, |
|
"loss": 0.0159, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 2.570909090909091, |
|
"grad_norm": 0.01627645082771778, |
|
"learning_rate": 7.612903225806451e-06, |
|
"loss": 0.0005, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.5745454545454547, |
|
"grad_norm": 0.009603966027498245, |
|
"learning_rate": 7.548387096774194e-06, |
|
"loss": 0.0005, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.578181818181818, |
|
"grad_norm": 0.008407434448599815, |
|
"learning_rate": 7.483870967741936e-06, |
|
"loss": 0.0004, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 2.581818181818182, |
|
"grad_norm": 0.007048910949379206, |
|
"learning_rate": 7.419354838709678e-06, |
|
"loss": 0.0003, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.5854545454545454, |
|
"grad_norm": 0.007168797310441732, |
|
"learning_rate": 7.35483870967742e-06, |
|
"loss": 0.0003, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 2.589090909090909, |
|
"grad_norm": 0.017820533365011215, |
|
"learning_rate": 7.290322580645162e-06, |
|
"loss": 0.0006, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 2.5927272727272728, |
|
"grad_norm": 1.8030993938446045, |
|
"learning_rate": 7.225806451612904e-06, |
|
"loss": 0.1001, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 2.596363636363636, |
|
"grad_norm": 0.006081653293222189, |
|
"learning_rate": 7.161290322580645e-06, |
|
"loss": 0.0003, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.013014406897127628, |
|
"learning_rate": 7.096774193548387e-06, |
|
"loss": 0.0005, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 2.6036363636363635, |
|
"grad_norm": 0.5529889464378357, |
|
"learning_rate": 7.03225806451613e-06, |
|
"loss": 0.0032, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 2.6072727272727274, |
|
"grad_norm": 0.010706817731261253, |
|
"learning_rate": 6.9677419354838705e-06, |
|
"loss": 0.0004, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 2.610909090909091, |
|
"grad_norm": 0.009283789433538914, |
|
"learning_rate": 6.9032258064516135e-06, |
|
"loss": 0.0004, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 2.6145454545454543, |
|
"grad_norm": 0.006831625942140818, |
|
"learning_rate": 6.8387096774193555e-06, |
|
"loss": 0.0003, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 2.618181818181818, |
|
"grad_norm": 0.010544957593083382, |
|
"learning_rate": 6.774193548387098e-06, |
|
"loss": 0.0004, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.621818181818182, |
|
"grad_norm": 0.573939323425293, |
|
"learning_rate": 6.709677419354839e-06, |
|
"loss": 0.1136, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 2.6254545454545455, |
|
"grad_norm": 0.007392039522528648, |
|
"learning_rate": 6.645161290322581e-06, |
|
"loss": 0.0003, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 2.629090909090909, |
|
"grad_norm": 0.03405938670039177, |
|
"learning_rate": 6.580645161290323e-06, |
|
"loss": 0.0007, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 2.632727272727273, |
|
"grad_norm": 0.018447572365403175, |
|
"learning_rate": 6.516129032258064e-06, |
|
"loss": 0.0005, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 0.00638270378112793, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 0.0003, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 0.017991170287132263, |
|
"learning_rate": 6.387096774193549e-06, |
|
"loss": 0.0004, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 2.6436363636363636, |
|
"grad_norm": 0.00989875290542841, |
|
"learning_rate": 6.322580645161291e-06, |
|
"loss": 0.0004, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 2.6472727272727274, |
|
"grad_norm": 3.8099822998046875, |
|
"learning_rate": 6.258064516129032e-06, |
|
"loss": 0.0512, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 2.650909090909091, |
|
"grad_norm": 0.012733093462884426, |
|
"learning_rate": 6.193548387096775e-06, |
|
"loss": 0.0005, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 2.6545454545454543, |
|
"grad_norm": 0.04748505726456642, |
|
"learning_rate": 6.129032258064516e-06, |
|
"loss": 0.0007, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.658181818181818, |
|
"grad_norm": 4.789937973022461, |
|
"learning_rate": 6.064516129032258e-06, |
|
"loss": 0.048, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.661818181818182, |
|
"grad_norm": 0.03021158277988434, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0006, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 2.6654545454545455, |
|
"grad_norm": 0.017927074804902077, |
|
"learning_rate": 5.935483870967742e-06, |
|
"loss": 0.0005, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 2.669090909090909, |
|
"grad_norm": 0.01334038283675909, |
|
"learning_rate": 5.870967741935484e-06, |
|
"loss": 0.0005, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 2.672727272727273, |
|
"grad_norm": 5.3513970375061035, |
|
"learning_rate": 5.806451612903226e-06, |
|
"loss": 0.0182, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 2.6763636363636363, |
|
"grad_norm": 0.012456363067030907, |
|
"learning_rate": 5.741935483870968e-06, |
|
"loss": 0.0004, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 0.024449974298477173, |
|
"learning_rate": 5.677419354838711e-06, |
|
"loss": 0.0006, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 2.6836363636363636, |
|
"grad_norm": 0.015060571022331715, |
|
"learning_rate": 5.612903225806452e-06, |
|
"loss": 0.0005, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 2.6872727272727275, |
|
"grad_norm": 4.723052024841309, |
|
"learning_rate": 5.548387096774194e-06, |
|
"loss": 0.0212, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.690909090909091, |
|
"grad_norm": 0.020275188609957695, |
|
"learning_rate": 5.483870967741936e-06, |
|
"loss": 0.0005, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.6945454545454544, |
|
"grad_norm": 0.00891982950270176, |
|
"learning_rate": 5.419354838709677e-06, |
|
"loss": 0.0004, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 2.6981818181818182, |
|
"grad_norm": 0.006951956544071436, |
|
"learning_rate": 5.35483870967742e-06, |
|
"loss": 0.0003, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 2.7018181818181817, |
|
"grad_norm": 1.1897835731506348, |
|
"learning_rate": 5.2903225806451614e-06, |
|
"loss": 0.1006, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 2.7054545454545456, |
|
"grad_norm": 0.0193193256855011, |
|
"learning_rate": 5.2258064516129035e-06, |
|
"loss": 0.0005, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.709090909090909, |
|
"grad_norm": 0.006243540905416012, |
|
"learning_rate": 5.161290322580646e-06, |
|
"loss": 0.0003, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.712727272727273, |
|
"grad_norm": 0.011104391887784004, |
|
"learning_rate": 5.096774193548387e-06, |
|
"loss": 0.0005, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 2.7163636363636363, |
|
"grad_norm": 0.015710929408669472, |
|
"learning_rate": 5.032258064516129e-06, |
|
"loss": 0.0005, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 0.008195963688194752, |
|
"learning_rate": 4.967741935483871e-06, |
|
"loss": 0.0004, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.7236363636363636, |
|
"grad_norm": 0.5103004574775696, |
|
"learning_rate": 4.903225806451613e-06, |
|
"loss": 0.0028, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 0.009401354938745499, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 0.0004, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.730909090909091, |
|
"grad_norm": 0.010534017346799374, |
|
"learning_rate": 4.774193548387097e-06, |
|
"loss": 0.0004, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 2.7345454545454544, |
|
"grad_norm": 0.02028539776802063, |
|
"learning_rate": 4.7096774193548385e-06, |
|
"loss": 0.0005, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 2.7381818181818183, |
|
"grad_norm": 0.10612978786230087, |
|
"learning_rate": 4.6451612903225815e-06, |
|
"loss": 0.0013, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 2.7418181818181817, |
|
"grad_norm": 0.008999668061733246, |
|
"learning_rate": 4.580645161290323e-06, |
|
"loss": 0.0004, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.7454545454545456, |
|
"grad_norm": 0.020546872168779373, |
|
"learning_rate": 4.516129032258065e-06, |
|
"loss": 0.0006, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.749090909090909, |
|
"grad_norm": 0.04722006618976593, |
|
"learning_rate": 4.451612903225807e-06, |
|
"loss": 0.0006, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 2.752727272727273, |
|
"grad_norm": 0.6383763551712036, |
|
"learning_rate": 4.387096774193548e-06, |
|
"loss": 0.0017, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 2.7563636363636363, |
|
"grad_norm": 0.013335658237338066, |
|
"learning_rate": 4.322580645161291e-06, |
|
"loss": 0.0005, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.01042084489017725, |
|
"learning_rate": 4.258064516129032e-06, |
|
"loss": 0.0004, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 2.7636363636363637, |
|
"grad_norm": 0.0967690572142601, |
|
"learning_rate": 4.193548387096774e-06, |
|
"loss": 0.0011, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.767272727272727, |
|
"grad_norm": 0.02381141297519207, |
|
"learning_rate": 4.1290322580645165e-06, |
|
"loss": 0.0007, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 2.770909090909091, |
|
"grad_norm": 0.02535305730998516, |
|
"learning_rate": 4.064516129032258e-06, |
|
"loss": 0.0006, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 2.7745454545454544, |
|
"grad_norm": 0.0302995927631855, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0007, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 2.7781818181818183, |
|
"grad_norm": 0.01775578036904335, |
|
"learning_rate": 3.935483870967742e-06, |
|
"loss": 0.0005, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 2.7818181818181817, |
|
"grad_norm": 0.011038933880627155, |
|
"learning_rate": 3.870967741935484e-06, |
|
"loss": 0.0004, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.785454545454545, |
|
"grad_norm": 0.010119972750544548, |
|
"learning_rate": 3.8064516129032257e-06, |
|
"loss": 0.0004, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.789090909090909, |
|
"grad_norm": 0.015269882045686245, |
|
"learning_rate": 3.741935483870968e-06, |
|
"loss": 0.0005, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 2.792727272727273, |
|
"grad_norm": 0.048511989414691925, |
|
"learning_rate": 3.67741935483871e-06, |
|
"loss": 0.0009, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 2.7963636363636364, |
|
"grad_norm": 0.08957032114267349, |
|
"learning_rate": 3.612903225806452e-06, |
|
"loss": 0.0009, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.012502241879701614, |
|
"learning_rate": 3.5483870967741936e-06, |
|
"loss": 0.0005, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.8036363636363637, |
|
"grad_norm": 0.017656538635492325, |
|
"learning_rate": 3.4838709677419353e-06, |
|
"loss": 0.0004, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 2.807272727272727, |
|
"grad_norm": 0.03845641762018204, |
|
"learning_rate": 3.4193548387096778e-06, |
|
"loss": 0.001, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 2.810909090909091, |
|
"grad_norm": 0.6619153618812561, |
|
"learning_rate": 3.3548387096774194e-06, |
|
"loss": 0.0026, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.8145454545454545, |
|
"grad_norm": 0.012663335539400578, |
|
"learning_rate": 3.2903225806451615e-06, |
|
"loss": 0.0005, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 0.2195906788110733, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 0.0085, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.821818181818182, |
|
"grad_norm": 0.02970001846551895, |
|
"learning_rate": 3.1612903225806457e-06, |
|
"loss": 0.0009, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 2.825454545454545, |
|
"grad_norm": 0.025815211236476898, |
|
"learning_rate": 3.0967741935483874e-06, |
|
"loss": 0.0007, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 2.829090909090909, |
|
"grad_norm": 0.026814987882971764, |
|
"learning_rate": 3.032258064516129e-06, |
|
"loss": 0.0006, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 2.832727272727273, |
|
"grad_norm": 0.7879754900932312, |
|
"learning_rate": 2.967741935483871e-06, |
|
"loss": 0.1117, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 2.8363636363636364, |
|
"grad_norm": 0.0365438349545002, |
|
"learning_rate": 2.903225806451613e-06, |
|
"loss": 0.0009, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.1561412513256073, |
|
"learning_rate": 2.8387096774193553e-06, |
|
"loss": 0.0012, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.8436363636363637, |
|
"grad_norm": 0.010296767577528954, |
|
"learning_rate": 2.774193548387097e-06, |
|
"loss": 0.0004, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.847272727272727, |
|
"grad_norm": 0.16209469735622406, |
|
"learning_rate": 2.7096774193548386e-06, |
|
"loss": 0.0013, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 2.850909090909091, |
|
"grad_norm": 0.014227217994630337, |
|
"learning_rate": 2.6451612903225807e-06, |
|
"loss": 0.0004, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 2.8545454545454545, |
|
"grad_norm": 0.06868361681699753, |
|
"learning_rate": 2.580645161290323e-06, |
|
"loss": 0.0006, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.8581818181818184, |
|
"grad_norm": 0.02630774676799774, |
|
"learning_rate": 2.5161290322580645e-06, |
|
"loss": 0.0006, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 2.861818181818182, |
|
"grad_norm": 0.007779018487781286, |
|
"learning_rate": 2.4516129032258066e-06, |
|
"loss": 0.0004, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 2.8654545454545453, |
|
"grad_norm": 0.03792530298233032, |
|
"learning_rate": 2.3870967741935486e-06, |
|
"loss": 0.0006, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 2.869090909090909, |
|
"grad_norm": 0.009642829187214375, |
|
"learning_rate": 2.3225806451612907e-06, |
|
"loss": 0.0004, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 2.8727272727272726, |
|
"grad_norm": 0.014530934393405914, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"loss": 0.0005, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.8763636363636365, |
|
"grad_norm": 0.09846967458724976, |
|
"learning_rate": 2.193548387096774e-06, |
|
"loss": 0.001, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.02130993641912937, |
|
"learning_rate": 2.129032258064516e-06, |
|
"loss": 0.0006, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 2.8836363636363638, |
|
"grad_norm": 0.6667435765266418, |
|
"learning_rate": 2.0645161290322582e-06, |
|
"loss": 0.0023, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 2.887272727272727, |
|
"grad_norm": 0.10888272523880005, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0009, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 2.8909090909090907, |
|
"grad_norm": 0.048606500029563904, |
|
"learning_rate": 1.935483870967742e-06, |
|
"loss": 0.0005, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.8945454545454545, |
|
"grad_norm": 0.23399078845977783, |
|
"learning_rate": 1.870967741935484e-06, |
|
"loss": 0.0027, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.8981818181818184, |
|
"grad_norm": 0.01046321727335453, |
|
"learning_rate": 1.806451612903226e-06, |
|
"loss": 0.0004, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 2.901818181818182, |
|
"grad_norm": 0.015533102676272392, |
|
"learning_rate": 1.7419354838709676e-06, |
|
"loss": 0.0006, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.9054545454545453, |
|
"grad_norm": 0.016338596120476723, |
|
"learning_rate": 1.6774193548387097e-06, |
|
"loss": 0.0006, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 0.00850651878863573, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 0.0004, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.9127272727272726, |
|
"grad_norm": 0.7118433713912964, |
|
"learning_rate": 1.5483870967741937e-06, |
|
"loss": 0.0027, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 2.9163636363636365, |
|
"grad_norm": 1.7679587602615356, |
|
"learning_rate": 1.4838709677419356e-06, |
|
"loss": 0.0496, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.010516179725527763, |
|
"learning_rate": 1.4193548387096776e-06, |
|
"loss": 0.0004, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 2.923636363636364, |
|
"grad_norm": 0.0124241653829813, |
|
"learning_rate": 1.3548387096774193e-06, |
|
"loss": 0.0004, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 2.9272727272727272, |
|
"grad_norm": 0.00775744765996933, |
|
"learning_rate": 1.2903225806451614e-06, |
|
"loss": 0.0003, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.9309090909090907, |
|
"grad_norm": 1.7472180128097534, |
|
"learning_rate": 1.2258064516129033e-06, |
|
"loss": 0.0587, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 2.9345454545454546, |
|
"grad_norm": 0.014171603135764599, |
|
"learning_rate": 1.1612903225806454e-06, |
|
"loss": 0.0005, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.9381818181818184, |
|
"grad_norm": 0.011718512512743473, |
|
"learning_rate": 1.096774193548387e-06, |
|
"loss": 0.0004, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.941818181818182, |
|
"grad_norm": 0.011687002144753933, |
|
"learning_rate": 1.0322580645161291e-06, |
|
"loss": 0.0004, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.9454545454545453, |
|
"grad_norm": 0.010272631421685219, |
|
"learning_rate": 9.67741935483871e-07, |
|
"loss": 0.0004, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.949090909090909, |
|
"grad_norm": 0.4066472351551056, |
|
"learning_rate": 9.03225806451613e-07, |
|
"loss": 0.0023, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.9527272727272726, |
|
"grad_norm": 1.5127947330474854, |
|
"learning_rate": 8.387096774193549e-07, |
|
"loss": 0.0037, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.9563636363636365, |
|
"grad_norm": 0.017161400988698006, |
|
"learning_rate": 7.741935483870968e-07, |
|
"loss": 0.0005, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.009147647768259048, |
|
"learning_rate": 7.096774193548388e-07, |
|
"loss": 0.0004, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.963636363636364, |
|
"grad_norm": 0.010304290801286697, |
|
"learning_rate": 6.451612903225807e-07, |
|
"loss": 0.0005, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.9672727272727273, |
|
"grad_norm": 0.027671974152326584, |
|
"learning_rate": 5.806451612903227e-07, |
|
"loss": 0.0008, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.9709090909090907, |
|
"grad_norm": 3.4109323024749756, |
|
"learning_rate": 5.161290322580646e-07, |
|
"loss": 0.0096, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.9745454545454546, |
|
"grad_norm": 0.1351071149110794, |
|
"learning_rate": 4.516129032258065e-07, |
|
"loss": 0.0012, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.978181818181818, |
|
"grad_norm": 0.01651870645582676, |
|
"learning_rate": 3.870967741935484e-07, |
|
"loss": 0.0005, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.981818181818182, |
|
"grad_norm": 0.012661241926252842, |
|
"learning_rate": 3.2258064516129035e-07, |
|
"loss": 0.0004, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.9854545454545454, |
|
"grad_norm": 0.007508403621613979, |
|
"learning_rate": 2.580645161290323e-07, |
|
"loss": 0.0003, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.9890909090909092, |
|
"grad_norm": 0.2275754064321518, |
|
"learning_rate": 1.935483870967742e-07, |
|
"loss": 0.001, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.9927272727272727, |
|
"grad_norm": 0.012626181356608868, |
|
"learning_rate": 1.2903225806451614e-07, |
|
"loss": 0.0005, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.996363636363636, |
|
"grad_norm": 0.016164276748895645, |
|
"learning_rate": 6.451612903225807e-08, |
|
"loss": 0.0006, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.007542683742940426, |
|
"learning_rate": 0.0, |
|
"loss": 0.0003, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9938524590163934, |
|
"eval_loss": 0.026551904156804085, |
|
"eval_runtime": 12.7394, |
|
"eval_samples_per_second": 344.756, |
|
"eval_steps_per_second": 5.416, |
|
"step": 825 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 825, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6981561778765824.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|