|
{ |
|
"best_metric": 0.26534104347229004, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-75", |
|
"epoch": 0.27012617735916117, |
|
"eval_steps": 25, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0028434334458859074, |
|
"grad_norm": 22.599037170410156, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 5.3378, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0028434334458859074, |
|
"eval_loss": 5.232264995574951, |
|
"eval_runtime": 3.9625, |
|
"eval_samples_per_second": 12.618, |
|
"eval_steps_per_second": 3.281, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005686866891771815, |
|
"grad_norm": 23.47909164428711, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 5.2597, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008530300337657722, |
|
"grad_norm": 18.171615600585938, |
|
"learning_rate": 0.0001, |
|
"loss": 4.3128, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01137373378354363, |
|
"grad_norm": 14.74112606048584, |
|
"learning_rate": 9.997376600647783e-05, |
|
"loss": 2.5084, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014217167229429535, |
|
"grad_norm": 11.828715324401855, |
|
"learning_rate": 9.989509461357426e-05, |
|
"loss": 1.0016, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017060600675315445, |
|
"grad_norm": 2.351505756378174, |
|
"learning_rate": 9.976407754861426e-05, |
|
"loss": 0.4414, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019904034121201352, |
|
"grad_norm": 5.641177177429199, |
|
"learning_rate": 9.958086757163489e-05, |
|
"loss": 0.4406, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02274746756708726, |
|
"grad_norm": 2.2351808547973633, |
|
"learning_rate": 9.934567829727386e-05, |
|
"loss": 0.3914, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.025590901012973163, |
|
"grad_norm": 2.748798131942749, |
|
"learning_rate": 9.905878394570453e-05, |
|
"loss": 0.3943, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02843433445885907, |
|
"grad_norm": 2.5455727577209473, |
|
"learning_rate": 9.872051902290737e-05, |
|
"loss": 0.378, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03127776790474498, |
|
"grad_norm": 3.8824398517608643, |
|
"learning_rate": 9.833127793065098e-05, |
|
"loss": 0.4721, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03412120135063089, |
|
"grad_norm": 3.543416976928711, |
|
"learning_rate": 9.789151450663723e-05, |
|
"loss": 0.3677, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03696463479651679, |
|
"grad_norm": 1.8024444580078125, |
|
"learning_rate": 9.740174149534693e-05, |
|
"loss": 0.3316, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.039808068242402704, |
|
"grad_norm": 1.9301329851150513, |
|
"learning_rate": 9.686252995020249e-05, |
|
"loss": 0.3689, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04265150168828861, |
|
"grad_norm": 2.8067333698272705, |
|
"learning_rate": 9.627450856774539e-05, |
|
"loss": 0.3827, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04549493513417452, |
|
"grad_norm": 1.4591389894485474, |
|
"learning_rate": 9.563836295460398e-05, |
|
"loss": 0.3528, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04833836858006042, |
|
"grad_norm": 1.4871190786361694, |
|
"learning_rate": 9.495483482810688e-05, |
|
"loss": 0.3235, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05118180202594633, |
|
"grad_norm": 1.626112937927246, |
|
"learning_rate": 9.422472115147382e-05, |
|
"loss": 0.3317, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05402523547183224, |
|
"grad_norm": 1.4325222969055176, |
|
"learning_rate": 9.3448873204592e-05, |
|
"loss": 0.3437, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05686866891771814, |
|
"grad_norm": 2.8382790088653564, |
|
"learning_rate": 9.2628195591462e-05, |
|
"loss": 0.3757, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05971210236360405, |
|
"grad_norm": 1.625089406967163, |
|
"learning_rate": 9.176364518546989e-05, |
|
"loss": 0.3304, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06255553580948996, |
|
"grad_norm": 1.772882342338562, |
|
"learning_rate": 9.08562300137157e-05, |
|
"loss": 0.3802, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06539896925537586, |
|
"grad_norm": 1.3592987060546875, |
|
"learning_rate": 8.990700808169889e-05, |
|
"loss": 0.3369, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06824240270126178, |
|
"grad_norm": 1.762557864189148, |
|
"learning_rate": 8.891708613973126e-05, |
|
"loss": 0.3376, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07108583614714768, |
|
"grad_norm": 1.9905650615692139, |
|
"learning_rate": 8.788761839251559e-05, |
|
"loss": 0.2961, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07108583614714768, |
|
"eval_loss": 0.3258455991744995, |
|
"eval_runtime": 4.0271, |
|
"eval_samples_per_second": 12.416, |
|
"eval_steps_per_second": 3.228, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07392926959303359, |
|
"grad_norm": 0.7239434719085693, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 0.3381, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07677270303891949, |
|
"grad_norm": 0.8661226034164429, |
|
"learning_rate": 8.571489144483944e-05, |
|
"loss": 0.3448, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07961613648480541, |
|
"grad_norm": 1.6016333103179932, |
|
"learning_rate": 8.457416554680877e-05, |
|
"loss": 0.3474, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08245956993069131, |
|
"grad_norm": 1.2299487590789795, |
|
"learning_rate": 8.339895749467238e-05, |
|
"loss": 0.3493, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08530300337657722, |
|
"grad_norm": 1.4147472381591797, |
|
"learning_rate": 8.219063752844926e-05, |
|
"loss": 0.3162, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08814643682246312, |
|
"grad_norm": 1.3127275705337524, |
|
"learning_rate": 8.095061449516903e-05, |
|
"loss": 0.3306, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09098987026834904, |
|
"grad_norm": 1.7408323287963867, |
|
"learning_rate": 7.968033420621935e-05, |
|
"loss": 0.3163, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09383330371423494, |
|
"grad_norm": 1.461795687675476, |
|
"learning_rate": 7.838127775159452e-05, |
|
"loss": 0.2727, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09667673716012085, |
|
"grad_norm": 1.4612610340118408, |
|
"learning_rate": 7.705495977301078e-05, |
|
"loss": 0.3574, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09952017060600675, |
|
"grad_norm": 1.4245153665542603, |
|
"learning_rate": 7.570292669790186e-05, |
|
"loss": 0.3115, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10236360405189265, |
|
"grad_norm": 1.1729720830917358, |
|
"learning_rate": 7.43267549363537e-05, |
|
"loss": 0.3435, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10520703749777857, |
|
"grad_norm": 0.9695225954055786, |
|
"learning_rate": 7.292804904308087e-05, |
|
"loss": 0.2869, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10805047094366448, |
|
"grad_norm": 0.7456521391868591, |
|
"learning_rate": 7.150843984658754e-05, |
|
"loss": 0.2947, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11089390438955038, |
|
"grad_norm": 0.8560568690299988, |
|
"learning_rate": 7.006958254769438e-05, |
|
"loss": 0.3231, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11373733783543628, |
|
"grad_norm": 0.9865386486053467, |
|
"learning_rate": 6.861315478964841e-05, |
|
"loss": 0.302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1165807712813222, |
|
"grad_norm": 0.9534958004951477, |
|
"learning_rate": 6.714085470206609e-05, |
|
"loss": 0.3014, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1194242047272081, |
|
"grad_norm": 0.8210452198982239, |
|
"learning_rate": 6.56543989209901e-05, |
|
"loss": 0.2968, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12226763817309401, |
|
"grad_norm": 1.0526001453399658, |
|
"learning_rate": 6.415552058736854e-05, |
|
"loss": 0.3187, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1251110716189799, |
|
"grad_norm": 1.1468490362167358, |
|
"learning_rate": 6.264596732629e-05, |
|
"loss": 0.2978, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12795450506486583, |
|
"grad_norm": 1.0789867639541626, |
|
"learning_rate": 6.112749920933111e-05, |
|
"loss": 0.2907, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13079793851075172, |
|
"grad_norm": 1.4448879957199097, |
|
"learning_rate": 5.960188670239154e-05, |
|
"loss": 0.3332, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13364137195663764, |
|
"grad_norm": 1.1925578117370605, |
|
"learning_rate": 5.80709086014102e-05, |
|
"loss": 0.2723, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13648480540252356, |
|
"grad_norm": 1.3483730554580688, |
|
"learning_rate": 5.653634995836856e-05, |
|
"loss": 0.3051, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13932823884840945, |
|
"grad_norm": 1.0803078413009644, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.2854, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14217167229429536, |
|
"grad_norm": 1.0243525505065918, |
|
"learning_rate": 5.346365004163145e-05, |
|
"loss": 0.2797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14217167229429536, |
|
"eval_loss": 0.27564024925231934, |
|
"eval_runtime": 4.0615, |
|
"eval_samples_per_second": 12.311, |
|
"eval_steps_per_second": 3.201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14501510574018128, |
|
"grad_norm": 1.0136526823043823, |
|
"learning_rate": 5.192909139858981e-05, |
|
"loss": 0.3151, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14785853918606717, |
|
"grad_norm": 1.1708089113235474, |
|
"learning_rate": 5.0398113297608465e-05, |
|
"loss": 0.2924, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1507019726319531, |
|
"grad_norm": 0.8153492212295532, |
|
"learning_rate": 4.887250079066892e-05, |
|
"loss": 0.2776, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15354540607783898, |
|
"grad_norm": 1.1344809532165527, |
|
"learning_rate": 4.7354032673710005e-05, |
|
"loss": 0.2746, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1563888395237249, |
|
"grad_norm": 1.1401007175445557, |
|
"learning_rate": 4.584447941263149e-05, |
|
"loss": 0.309, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15923227296961082, |
|
"grad_norm": 1.3479905128479004, |
|
"learning_rate": 4.43456010790099e-05, |
|
"loss": 0.2876, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1620757064154967, |
|
"grad_norm": 1.0521594285964966, |
|
"learning_rate": 4.285914529793391e-05, |
|
"loss": 0.2562, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16491913986138262, |
|
"grad_norm": 0.9862216711044312, |
|
"learning_rate": 4.13868452103516e-05, |
|
"loss": 0.2832, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16776257330726851, |
|
"grad_norm": 1.2180746793746948, |
|
"learning_rate": 3.9930417452305626e-05, |
|
"loss": 0.3047, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17060600675315443, |
|
"grad_norm": 1.5846315622329712, |
|
"learning_rate": 3.8491560153412466e-05, |
|
"loss": 0.2768, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17344944019904035, |
|
"grad_norm": 1.3017767667770386, |
|
"learning_rate": 3.707195095691913e-05, |
|
"loss": 0.2877, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17629287364492624, |
|
"grad_norm": 1.473238468170166, |
|
"learning_rate": 3.567324506364632e-05, |
|
"loss": 0.2664, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17913630709081216, |
|
"grad_norm": 1.2154439687728882, |
|
"learning_rate": 3.4297073302098156e-05, |
|
"loss": 0.2768, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18197974053669808, |
|
"grad_norm": 1.683435082435608, |
|
"learning_rate": 3.2945040226989244e-05, |
|
"loss": 0.2896, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18482317398258397, |
|
"grad_norm": 1.3778927326202393, |
|
"learning_rate": 3.16187222484055e-05, |
|
"loss": 0.295, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18766660742846988, |
|
"grad_norm": 1.4608365297317505, |
|
"learning_rate": 3.0319665793780648e-05, |
|
"loss": 0.2611, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19051004087435577, |
|
"grad_norm": 1.2854911088943481, |
|
"learning_rate": 2.9049385504830985e-05, |
|
"loss": 0.2541, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1933534743202417, |
|
"grad_norm": 1.3991892337799072, |
|
"learning_rate": 2.7809362471550748e-05, |
|
"loss": 0.2624, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1961969077661276, |
|
"grad_norm": 1.1210330724716187, |
|
"learning_rate": 2.660104250532764e-05, |
|
"loss": 0.2684, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1990403412120135, |
|
"grad_norm": 1.145573616027832, |
|
"learning_rate": 2.5425834453191232e-05, |
|
"loss": 0.2504, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20188377465789942, |
|
"grad_norm": 0.9745646715164185, |
|
"learning_rate": 2.4285108555160577e-05, |
|
"loss": 0.249, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2047272081037853, |
|
"grad_norm": 1.0583624839782715, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 0.2482, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20757064154967123, |
|
"grad_norm": 1.106729507446289, |
|
"learning_rate": 2.2112381607484417e-05, |
|
"loss": 0.2899, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21041407499555714, |
|
"grad_norm": 1.0823482275009155, |
|
"learning_rate": 2.1082913860268765e-05, |
|
"loss": 0.2543, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.21325750844144303, |
|
"grad_norm": 0.9167066216468811, |
|
"learning_rate": 2.0092991918301108e-05, |
|
"loss": 0.2498, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21325750844144303, |
|
"eval_loss": 0.26534104347229004, |
|
"eval_runtime": 4.0551, |
|
"eval_samples_per_second": 12.33, |
|
"eval_steps_per_second": 3.206, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21610094188732895, |
|
"grad_norm": 1.3717762231826782, |
|
"learning_rate": 1.91437699862843e-05, |
|
"loss": 0.2886, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21894437533321487, |
|
"grad_norm": 1.5089960098266602, |
|
"learning_rate": 1.8236354814530112e-05, |
|
"loss": 0.3046, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.22178780877910076, |
|
"grad_norm": 1.2876425981521606, |
|
"learning_rate": 1.7371804408538024e-05, |
|
"loss": 0.2682, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.22463124222498668, |
|
"grad_norm": 1.4826658964157104, |
|
"learning_rate": 1.6551126795408016e-05, |
|
"loss": 0.2612, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22747467567087257, |
|
"grad_norm": 1.2903155088424683, |
|
"learning_rate": 1.577527884852619e-05, |
|
"loss": 0.2599, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23031810911675848, |
|
"grad_norm": 1.2806668281555176, |
|
"learning_rate": 1.5045165171893116e-05, |
|
"loss": 0.2419, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2331615425626444, |
|
"grad_norm": 1.1345335245132446, |
|
"learning_rate": 1.4361637045396029e-05, |
|
"loss": 0.2579, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2360049760085303, |
|
"grad_norm": 1.1329256296157837, |
|
"learning_rate": 1.3725491432254624e-05, |
|
"loss": 0.2659, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2388484094544162, |
|
"grad_norm": 1.2562272548675537, |
|
"learning_rate": 1.313747004979751e-05, |
|
"loss": 0.2676, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.24169184290030213, |
|
"grad_norm": 0.992720901966095, |
|
"learning_rate": 1.2598258504653081e-05, |
|
"loss": 0.2283, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24453527634618802, |
|
"grad_norm": 1.2588672637939453, |
|
"learning_rate": 1.2108485493362765e-05, |
|
"loss": 0.2456, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.24737870979207394, |
|
"grad_norm": 0.9704244136810303, |
|
"learning_rate": 1.1668722069349041e-05, |
|
"loss": 0.2605, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2502221432379598, |
|
"grad_norm": 0.993431031703949, |
|
"learning_rate": 1.1279480977092635e-05, |
|
"loss": 0.2861, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2530655766838457, |
|
"grad_norm": 1.2297654151916504, |
|
"learning_rate": 1.094121605429547e-05, |
|
"loss": 0.2832, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.25590901012973166, |
|
"grad_norm": 1.498766303062439, |
|
"learning_rate": 1.0654321702726141e-05, |
|
"loss": 0.271, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25875244357561755, |
|
"grad_norm": 1.326579213142395, |
|
"learning_rate": 1.0419132428365116e-05, |
|
"loss": 0.2704, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.26159587702150344, |
|
"grad_norm": 1.417798399925232, |
|
"learning_rate": 1.0235922451385733e-05, |
|
"loss": 0.2509, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2644393104673894, |
|
"grad_norm": 1.0304580926895142, |
|
"learning_rate": 1.0104905386425733e-05, |
|
"loss": 0.2516, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2672827439132753, |
|
"grad_norm": 1.153357982635498, |
|
"learning_rate": 1.002623399352217e-05, |
|
"loss": 0.2606, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.27012617735916117, |
|
"grad_norm": 1.3540794849395752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.267, |
|
"step": 95 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.173151075448914e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|