|
{ |
|
"best_metric": 0.7676969092721835, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-1090", |
|
"epoch": 9.954337899543379, |
|
"eval_steps": 500, |
|
"global_step": 1090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.0251981019973755, |
|
"learning_rate": 0.004954128440366973, |
|
"loss": 1.2326, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.6157691478729248, |
|
"learning_rate": 0.004908256880733945, |
|
"loss": 0.9567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.101731538772583, |
|
"learning_rate": 0.004862385321100918, |
|
"loss": 0.905, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.8456130623817444, |
|
"learning_rate": 0.00481651376146789, |
|
"loss": 0.9132, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.6145790219306946, |
|
"learning_rate": 0.0047706422018348625, |
|
"loss": 0.8798, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1609084606170654, |
|
"learning_rate": 0.004724770642201835, |
|
"loss": 0.7849, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.7732934355735779, |
|
"learning_rate": 0.004678899082568808, |
|
"loss": 0.7901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.7393956780433655, |
|
"learning_rate": 0.00463302752293578, |
|
"loss": 0.7932, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.667579174041748, |
|
"learning_rate": 0.0045871559633027525, |
|
"loss": 0.7811, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.8475561141967773, |
|
"learning_rate": 0.004541284403669725, |
|
"loss": 0.7579, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7427716849451645, |
|
"eval_f1": 0.3926656517433595, |
|
"eval_loss": 0.7045032978057861, |
|
"eval_precision": 0.5204175676877641, |
|
"eval_recall": 0.3710290953850673, |
|
"eval_runtime": 5.2715, |
|
"eval_samples_per_second": 190.268, |
|
"eval_steps_per_second": 11.951, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.7959792613983154, |
|
"learning_rate": 0.004495412844036698, |
|
"loss": 0.8206, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.823697566986084, |
|
"learning_rate": 0.0044495412844036695, |
|
"loss": 0.7631, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.7514823079109192, |
|
"learning_rate": 0.004403669724770643, |
|
"loss": 0.7404, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.4910705089569092, |
|
"learning_rate": 0.004357798165137615, |
|
"loss": 0.7533, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 1.1586723327636719, |
|
"learning_rate": 0.004311926605504587, |
|
"loss": 0.7781, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.106838583946228, |
|
"learning_rate": 0.0042660550458715595, |
|
"loss": 0.7337, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.4145853519439697, |
|
"learning_rate": 0.004220183486238533, |
|
"loss": 0.7871, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.0210392475128174, |
|
"learning_rate": 0.004174311926605505, |
|
"loss": 0.827, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.4612568616867065, |
|
"learning_rate": 0.004128440366972477, |
|
"loss": 0.8952, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 2.333279848098755, |
|
"learning_rate": 0.00408256880733945, |
|
"loss": 0.8019, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.9485087394714355, |
|
"learning_rate": 0.004036697247706422, |
|
"loss": 0.7689, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7278165503489531, |
|
"eval_f1": 0.3573330762987477, |
|
"eval_loss": 0.75123530626297, |
|
"eval_precision": 0.3964031496153109, |
|
"eval_recall": 0.35266989798605725, |
|
"eval_runtime": 5.3783, |
|
"eval_samples_per_second": 186.49, |
|
"eval_steps_per_second": 11.714, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 1.6179887056350708, |
|
"learning_rate": 0.003990825688073394, |
|
"loss": 0.8527, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.0684826374053955, |
|
"learning_rate": 0.00394954128440367, |
|
"loss": 0.8174, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.6670346260070801, |
|
"learning_rate": 0.003903669724770642, |
|
"loss": 0.7894, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.4650906324386597, |
|
"learning_rate": 0.003857798165137615, |
|
"loss": 0.7712, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 1.9125275611877441, |
|
"learning_rate": 0.003811926605504587, |
|
"loss": 0.8228, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 2.175058603286743, |
|
"learning_rate": 0.0037706422018348625, |
|
"loss": 0.7592, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 1.2160431146621704, |
|
"learning_rate": 0.003724770642201835, |
|
"loss": 0.7986, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.9580293893814087, |
|
"learning_rate": 0.0036788990825688075, |
|
"loss": 0.7767, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.544980525970459, |
|
"learning_rate": 0.0036330275229357802, |
|
"loss": 0.7882, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 2.8008100986480713, |
|
"learning_rate": 0.003587155963302752, |
|
"loss": 0.8818, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 3.061676025390625, |
|
"learning_rate": 0.003541284403669725, |
|
"loss": 0.7353, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7357926221335992, |
|
"eval_f1": 0.40020878247545494, |
|
"eval_loss": 0.7191066145896912, |
|
"eval_precision": 0.4630387632736769, |
|
"eval_recall": 0.4201909974743698, |
|
"eval_runtime": 5.4433, |
|
"eval_samples_per_second": 184.263, |
|
"eval_steps_per_second": 11.574, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 6.1841206550598145, |
|
"learning_rate": 0.003495412844036697, |
|
"loss": 0.762, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 2.103243589401245, |
|
"learning_rate": 0.00344954128440367, |
|
"loss": 0.798, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 2.337041139602661, |
|
"learning_rate": 0.0034082568807339447, |
|
"loss": 0.7817, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 3.2973079681396484, |
|
"learning_rate": 0.0033669724770642204, |
|
"loss": 0.7669, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 1.7525138854980469, |
|
"learning_rate": 0.003321100917431193, |
|
"loss": 0.8788, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 3.467615842819214, |
|
"learning_rate": 0.003275229357798165, |
|
"loss": 0.9385, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 2.560269355773926, |
|
"learning_rate": 0.0032293577981651377, |
|
"loss": 0.9773, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.9047974348068237, |
|
"learning_rate": 0.00318348623853211, |
|
"loss": 0.948, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.7493894100189209, |
|
"learning_rate": 0.0031376146788990827, |
|
"loss": 0.9429, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.787327229976654, |
|
"learning_rate": 0.003091743119266055, |
|
"loss": 0.9193, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 0.5133536458015442, |
|
"learning_rate": 0.0030458715596330278, |
|
"loss": 0.8429, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6809571286141576, |
|
"eval_f1": 0.18506746606597574, |
|
"eval_loss": 0.7857978940010071, |
|
"eval_precision": 0.4280081257540274, |
|
"eval_recall": 0.1812976208760752, |
|
"eval_runtime": 5.4671, |
|
"eval_samples_per_second": 183.46, |
|
"eval_steps_per_second": 11.523, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 1.170052170753479, |
|
"learning_rate": 0.003, |
|
"loss": 0.7773, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 1.006057858467102, |
|
"learning_rate": 0.002954128440366973, |
|
"loss": 0.8049, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.5809192657470703, |
|
"learning_rate": 0.002908256880733945, |
|
"loss": 0.81, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 0.7377546429634094, |
|
"learning_rate": 0.002862385321100918, |
|
"loss": 0.7943, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 0.5214153528213501, |
|
"learning_rate": 0.0028165137614678897, |
|
"loss": 0.7664, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 0.615777313709259, |
|
"learning_rate": 0.0027706422018348624, |
|
"loss": 0.7769, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.5365859866142273, |
|
"learning_rate": 0.0027247706422018347, |
|
"loss": 0.8232, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.7110781669616699, |
|
"learning_rate": 0.0026788990825688075, |
|
"loss": 0.7755, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.8415727615356445, |
|
"learning_rate": 0.0026330275229357798, |
|
"loss": 0.8154, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.7670438289642334, |
|
"learning_rate": 0.0025871559633027525, |
|
"loss": 0.7386, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.8495368957519531, |
|
"learning_rate": 0.002541284403669725, |
|
"loss": 0.7929, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7218344965104686, |
|
"eval_f1": 0.35225874228381837, |
|
"eval_loss": 0.7013418674468994, |
|
"eval_precision": 0.5157907633577941, |
|
"eval_recall": 0.397101205649215, |
|
"eval_runtime": 5.3335, |
|
"eval_samples_per_second": 188.058, |
|
"eval_steps_per_second": 11.812, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.9542437791824341, |
|
"learning_rate": 0.0024954128440366975, |
|
"loss": 0.75, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"grad_norm": 0.7318850755691528, |
|
"learning_rate": 0.00244954128440367, |
|
"loss": 0.7665, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 1.0321910381317139, |
|
"learning_rate": 0.0024036697247706426, |
|
"loss": 0.7015, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 0.786632239818573, |
|
"learning_rate": 0.002357798165137615, |
|
"loss": 0.7756, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.8995092511177063, |
|
"learning_rate": 0.002311926605504587, |
|
"loss": 0.79, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 0.7932329773902893, |
|
"learning_rate": 0.00226605504587156, |
|
"loss": 0.7069, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 0.7889924645423889, |
|
"learning_rate": 0.002220183486238532, |
|
"loss": 0.7507, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 0.8880289793014526, |
|
"learning_rate": 0.002174311926605505, |
|
"loss": 0.7341, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 0.5282242298126221, |
|
"learning_rate": 0.0021284403669724773, |
|
"loss": 0.685, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 1.16853666305542, |
|
"learning_rate": 0.0020825688073394496, |
|
"loss": 0.7149, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.6140002012252808, |
|
"learning_rate": 0.0020366972477064223, |
|
"loss": 0.6804, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7607178464606181, |
|
"eval_f1": 0.43914392933644575, |
|
"eval_loss": 0.6822256445884705, |
|
"eval_precision": 0.5010781993513719, |
|
"eval_recall": 0.4239506721942319, |
|
"eval_runtime": 5.2475, |
|
"eval_samples_per_second": 191.137, |
|
"eval_steps_per_second": 12.006, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.7350448369979858, |
|
"learning_rate": 0.0019908256880733946, |
|
"loss": 0.7111, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.6007147431373596, |
|
"learning_rate": 0.001944954128440367, |
|
"loss": 0.6981, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.8717228770256042, |
|
"learning_rate": 0.0018990825688073396, |
|
"loss": 0.666, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 0.5795207023620605, |
|
"learning_rate": 0.0018532110091743121, |
|
"loss": 0.7206, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 0.9550521373748779, |
|
"learning_rate": 0.0018073394495412844, |
|
"loss": 0.7388, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 1.932844877243042, |
|
"learning_rate": 0.001761467889908257, |
|
"loss": 0.7198, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 0.8446419835090637, |
|
"learning_rate": 0.0017155963302752295, |
|
"loss": 0.6446, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 1.2699838876724243, |
|
"learning_rate": 0.001669724770642202, |
|
"loss": 0.683, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 0.8056779503822327, |
|
"learning_rate": 0.0016238532110091745, |
|
"loss": 0.7272, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 0.8416026830673218, |
|
"learning_rate": 0.0015779816513761468, |
|
"loss": 0.7436, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 0.8119596242904663, |
|
"learning_rate": 0.0015321100917431193, |
|
"loss": 0.6922, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7666999002991027, |
|
"eval_f1": 0.5226745508907684, |
|
"eval_loss": 0.653312087059021, |
|
"eval_precision": 0.6761986767002692, |
|
"eval_recall": 0.5105511763591389, |
|
"eval_runtime": 5.3357, |
|
"eval_samples_per_second": 187.978, |
|
"eval_steps_per_second": 11.807, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.218758225440979, |
|
"learning_rate": 0.0014862385321100919, |
|
"loss": 0.7674, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 1.0076816082000732, |
|
"learning_rate": 0.0014403669724770644, |
|
"loss": 0.6783, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"grad_norm": 1.2119933366775513, |
|
"learning_rate": 0.0013944954128440369, |
|
"loss": 0.6743, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.7832847237586975, |
|
"learning_rate": 0.0013486238532110092, |
|
"loss": 0.751, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 0.7728474140167236, |
|
"learning_rate": 0.0013027522935779817, |
|
"loss": 0.7252, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 1.1566990613937378, |
|
"learning_rate": 0.0012568807339449542, |
|
"loss": 0.6176, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"grad_norm": 0.8185287117958069, |
|
"learning_rate": 0.0012110091743119267, |
|
"loss": 0.6928, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 1.3969968557357788, |
|
"learning_rate": 0.0011651376146788993, |
|
"loss": 0.6475, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 1.7943429946899414, |
|
"learning_rate": 0.0011192660550458716, |
|
"loss": 0.6414, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 1.6549348831176758, |
|
"learning_rate": 0.001073394495412844, |
|
"loss": 0.717, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 1.016310691833496, |
|
"learning_rate": 0.0010275229357798166, |
|
"loss": 0.6563, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7467597208374875, |
|
"eval_f1": 0.4496255647360247, |
|
"eval_loss": 0.6757794618606567, |
|
"eval_precision": 0.4547903051227771, |
|
"eval_recall": 0.4588581671719845, |
|
"eval_runtime": 5.4168, |
|
"eval_samples_per_second": 185.164, |
|
"eval_steps_per_second": 11.63, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 1.1468579769134521, |
|
"learning_rate": 0.0009816513761467891, |
|
"loss": 0.7141, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 3.9916632175445557, |
|
"learning_rate": 0.0009357798165137615, |
|
"loss": 0.6384, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.3290263414382935, |
|
"learning_rate": 0.000889908256880734, |
|
"loss": 0.6598, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 2.3329484462738037, |
|
"learning_rate": 0.0008440366972477065, |
|
"loss": 0.744, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 1.5532264709472656, |
|
"learning_rate": 0.000798165137614679, |
|
"loss": 0.64, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.8567171692848206, |
|
"learning_rate": 0.0007522935779816515, |
|
"loss": 0.6803, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 0.8911744356155396, |
|
"learning_rate": 0.0007064220183486239, |
|
"loss": 0.6808, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 0.8479019999504089, |
|
"learning_rate": 0.0006605504587155964, |
|
"loss": 0.6848, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 1.6710033416748047, |
|
"learning_rate": 0.0006146788990825688, |
|
"loss": 0.6663, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.7447875142097473, |
|
"learning_rate": 0.0005688073394495413, |
|
"loss": 0.6389, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 1.6779547929763794, |
|
"learning_rate": 0.0005229357798165138, |
|
"loss": 0.6985, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_f1": 0.49146885132144036, |
|
"eval_loss": 0.6264088749885559, |
|
"eval_precision": 0.6450867892155376, |
|
"eval_recall": 0.4691799634422585, |
|
"eval_runtime": 5.2889, |
|
"eval_samples_per_second": 189.641, |
|
"eval_steps_per_second": 11.912, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.9829887747764587, |
|
"learning_rate": 0.00047706422018348627, |
|
"loss": 0.608, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 0.8272444009780884, |
|
"learning_rate": 0.00043119266055045873, |
|
"loss": 0.6661, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 1.056658387184143, |
|
"learning_rate": 0.0003853211009174312, |
|
"loss": 0.6982, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 0.8954740166664124, |
|
"learning_rate": 0.0003394495412844037, |
|
"loss": 0.5824, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 1.664057731628418, |
|
"learning_rate": 0.0002935779816513762, |
|
"loss": 0.657, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 0.9140777587890625, |
|
"learning_rate": 0.00024770642201834864, |
|
"loss": 0.5975, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 1.4085102081298828, |
|
"learning_rate": 0.0002018348623853211, |
|
"loss": 0.6377, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.9538210034370422, |
|
"learning_rate": 0.0001559633027522936, |
|
"loss": 0.6508, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 1.5182217359542847, |
|
"learning_rate": 0.00011009174311926606, |
|
"loss": 0.6776, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"grad_norm": 2.307227849960327, |
|
"learning_rate": 6.422018348623854e-05, |
|
"loss": 0.6758, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 0.9896465539932251, |
|
"learning_rate": 1.834862385321101e-05, |
|
"loss": 0.6283, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.7676969092721835, |
|
"eval_f1": 0.50879281522548, |
|
"eval_loss": 0.6179494857788086, |
|
"eval_precision": 0.5889156507510112, |
|
"eval_recall": 0.4796382542284182, |
|
"eval_runtime": 5.4809, |
|
"eval_samples_per_second": 182.998, |
|
"eval_steps_per_second": 11.494, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"step": 1090, |
|
"total_flos": 5.442882169274339e+18, |
|
"train_loss": 0.7558124568484245, |
|
"train_runtime": 825.7866, |
|
"train_samples_per_second": 84.852, |
|
"train_steps_per_second": 1.32 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 5.442882169274339e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|