{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999697285661431, "eval_steps": 500, "global_step": 22296, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.9997567688496474e-05, "loss": 2.9132, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.9990172715142793e-05, "loss": 0.0721, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.997781629993153e-05, "loss": 0.0757, "step": 300 }, { "epoch": 0.05, "learning_rate": 4.9960500896052476e-05, "loss": 0.0599, "step": 400 }, { "epoch": 0.07, "learning_rate": 4.9938477167054135e-05, "loss": 0.0353, "step": 500 }, { "epoch": 0.08, "learning_rate": 4.9911304569050045e-05, "loss": 0.0256, "step": 600 }, { "epoch": 0.09, "learning_rate": 4.987918618733232e-05, "loss": 0.0173, "step": 700 }, { "epoch": 0.11, "learning_rate": 4.98421283985469e-05, "loss": 0.0276, "step": 800 }, { "epoch": 0.12, "learning_rate": 4.9800138559988466e-05, "loss": 0.0225, "step": 900 }, { "epoch": 0.13, "learning_rate": 4.97532250081397e-05, "loss": 0.026, "step": 1000 }, { "epoch": 0.15, "learning_rate": 4.970139705701628e-05, "loss": 0.0271, "step": 1100 }, { "epoch": 0.16, "learning_rate": 4.9644664996317616e-05, "loss": 0.0231, "step": 1200 }, { "epoch": 0.17, "learning_rate": 4.958304008938407e-05, "loss": 0.0196, "step": 1300 }, { "epoch": 0.19, "learning_rate": 4.951653457096072e-05, "loss": 0.0124, "step": 1400 }, { "epoch": 0.2, "learning_rate": 4.944516164476834e-05, "loss": 0.0159, "step": 1500 }, { "epoch": 0.22, "learning_rate": 4.9368935480882034e-05, "loss": 0.0144, "step": 1600 }, { "epoch": 0.23, "learning_rate": 4.9287871212917866e-05, "loss": 0.0182, "step": 1700 }, { "epoch": 0.24, "learning_rate": 4.9201984935028426e-05, "loss": 0.018, "step": 1800 }, { "epoch": 0.26, "learning_rate": 4.911129369870746e-05, "loss": 0.0156, "step": 1900 }, { "epoch": 0.27, "learning_rate": 4.90158155094046e-05, "loss": 0.0146, "step": 2000 }, { "epoch": 0.28, "learning_rate": 4.8915569322950615e-05, "loss": 0.0196, "step": 2100 }, { "epoch": 0.3, "learning_rate": 4.8810575041794e-05, "loss": 0.02, "step": 2200 }, { "epoch": 0.31, "learning_rate": 4.8700853511049656e-05, "loss": 0.0143, "step": 2300 }, { "epoch": 0.32, "learning_rate": 4.858642651436035e-05, "loss": 0.0111, "step": 2400 }, { "epoch": 0.34, "learning_rate": 4.846731676957191e-05, "loss": 0.0179, "step": 2500 }, { "epoch": 0.35, "learning_rate": 4.834354792422293e-05, "loss": 0.0133, "step": 2600 }, { "epoch": 0.36, "learning_rate": 4.821514455084985e-05, "loss": 0.0112, "step": 2700 }, { "epoch": 0.38, "learning_rate": 4.8082132142108465e-05, "loss": 0.0131, "step": 2800 }, { "epoch": 0.39, "learning_rate": 4.794453710571272e-05, "loss": 0.0134, "step": 2900 }, { "epoch": 0.4, "learning_rate": 4.780238675919182e-05, "loss": 0.0084, "step": 3000 }, { "epoch": 0.42, "learning_rate": 4.765570932446672e-05, "loss": 0.0069, "step": 3100 }, { "epoch": 0.43, "learning_rate": 4.75045339222471e-05, "loss": 0.0186, "step": 3200 }, { "epoch": 0.44, "learning_rate": 4.734889056624983e-05, "loss": 0.0138, "step": 3300 }, { "epoch": 0.46, "learning_rate": 4.718881015724017e-05, "loss": 0.012, "step": 3400 }, { "epoch": 0.47, "learning_rate": 4.702432447689692e-05, "loss": 0.0121, "step": 3500 }, { "epoch": 0.48, "learning_rate": 4.6855466181502544e-05, "loss": 0.0104, "step": 3600 }, { "epoch": 0.5, "learning_rate": 4.66822687954598e-05, "loss": 0.014, "step": 3700 }, { "epoch": 0.51, "learning_rate": 4.65047667046359e-05, "loss": 0.0085, "step": 3800 }, { "epoch": 0.52, "learning_rate": 4.632299514953571e-05, "loss": 0.0111, "step": 3900 }, { "epoch": 0.54, "learning_rate": 4.613699021830524e-05, "loss": 0.0122, "step": 4000 }, { "epoch": 0.55, "learning_rate": 4.59467888395669e-05, "loss": 0.0082, "step": 4100 }, { "epoch": 0.57, "learning_rate": 4.575242877508777e-05, "loss": 0.0087, "step": 4200 }, { "epoch": 0.58, "learning_rate": 4.5553948612282607e-05, "loss": 0.0115, "step": 4300 }, { "epoch": 0.59, "learning_rate": 4.5351387756552846e-05, "loss": 0.0069, "step": 4400 }, { "epoch": 0.61, "learning_rate": 4.51447864234632e-05, "loss": 0.0099, "step": 4500 }, { "epoch": 0.62, "learning_rate": 4.4934185630757484e-05, "loss": 0.0087, "step": 4600 }, { "epoch": 0.63, "learning_rate": 4.4719627190215064e-05, "loss": 0.0096, "step": 4700 }, { "epoch": 0.65, "learning_rate": 4.450115369934976e-05, "loss": 0.0122, "step": 4800 }, { "epoch": 0.66, "learning_rate": 4.427880853295274e-05, "loss": 0.0076, "step": 4900 }, { "epoch": 0.67, "learning_rate": 4.4052635834481025e-05, "loss": 0.0071, "step": 5000 }, { "epoch": 0.69, "learning_rate": 4.3822680507293455e-05, "loss": 0.0105, "step": 5100 }, { "epoch": 0.7, "learning_rate": 4.358898820573581e-05, "loss": 0.0108, "step": 5200 }, { "epoch": 0.71, "learning_rate": 4.3351605326076724e-05, "loss": 0.0063, "step": 5300 }, { "epoch": 0.73, "learning_rate": 4.3110578997296416e-05, "loss": 0.0147, "step": 5400 }, { "epoch": 0.74, "learning_rate": 4.286595707172986e-05, "loss": 0.0135, "step": 5500 }, { "epoch": 0.75, "learning_rate": 4.261778811556646e-05, "loss": 0.0105, "step": 5600 }, { "epoch": 0.77, "learning_rate": 4.236612139920786e-05, "loss": 0.0131, "step": 5700 }, { "epoch": 0.78, "learning_rate": 4.2111006887486035e-05, "loss": 0.0085, "step": 5800 }, { "epoch": 0.79, "learning_rate": 4.185249522974346e-05, "loss": 0.0052, "step": 5900 }, { "epoch": 0.81, "learning_rate": 4.159063774977748e-05, "loss": 0.008, "step": 6000 }, { "epoch": 0.82, "learning_rate": 4.1325486435650625e-05, "loss": 0.011, "step": 6100 }, { "epoch": 0.83, "learning_rate": 4.105709392936914e-05, "loss": 0.0074, "step": 6200 }, { "epoch": 0.85, "learning_rate": 4.0785513516431705e-05, "loss": 0.0147, "step": 6300 }, { "epoch": 0.86, "learning_rate": 4.051079911525031e-05, "loss": 0.0049, "step": 6400 }, { "epoch": 0.87, "learning_rate": 4.023300526644557e-05, "loss": 0.0075, "step": 6500 }, { "epoch": 0.89, "learning_rate": 3.995501009115527e-05, "loss": 0.0114, "step": 6600 }, { "epoch": 0.9, "learning_rate": 3.967125281105033e-05, "loss": 0.0089, "step": 6700 }, { "epoch": 0.91, "learning_rate": 3.93845827632495e-05, "loss": 0.0066, "step": 6800 }, { "epoch": 0.93, "learning_rate": 3.909505686199625e-05, "loss": 0.0079, "step": 6900 }, { "epoch": 0.94, "learning_rate": 3.880273258852296e-05, "loss": 0.008, "step": 7000 }, { "epoch": 0.96, "learning_rate": 3.850766797963886e-05, "loss": 0.0068, "step": 7100 }, { "epoch": 0.97, "learning_rate": 3.8209921616207645e-05, "loss": 0.0069, "step": 7200 }, { "epoch": 0.98, "learning_rate": 3.790955261151704e-05, "loss": 0.0088, "step": 7300 }, { "epoch": 1.0, "learning_rate": 3.7606620599542756e-05, "loss": 0.0078, "step": 7400 }, { "epoch": 1.01, "learning_rate": 3.730118572310899e-05, "loss": 0.008, "step": 7500 }, { "epoch": 1.02, "learning_rate": 3.699330862194794e-05, "loss": 0.0057, "step": 7600 }, { "epoch": 1.04, "learning_rate": 3.668305042066061e-05, "loss": 0.0046, "step": 7700 }, { "epoch": 1.05, "learning_rate": 3.637047271658145e-05, "loss": 0.0059, "step": 7800 }, { "epoch": 1.06, "learning_rate": 3.605563756754904e-05, "loss": 0.0046, "step": 7900 }, { "epoch": 1.08, "learning_rate": 3.573860747958544e-05, "loss": 0.0085, "step": 8000 }, { "epoch": 1.09, "learning_rate": 3.541944539448648e-05, "loss": 0.0064, "step": 8100 }, { "epoch": 1.1, "learning_rate": 3.509821467732553e-05, "loss": 0.0064, "step": 8200 }, { "epoch": 1.12, "learning_rate": 3.477822117325554e-05, "loss": 0.0061, "step": 8300 }, { "epoch": 1.13, "learning_rate": 3.445306400520726e-05, "loss": 0.0087, "step": 8400 }, { "epoch": 1.14, "learning_rate": 3.4126030066319e-05, "loss": 0.0042, "step": 8500 }, { "epoch": 1.16, "learning_rate": 3.379718428450832e-05, "loss": 0.0071, "step": 8600 }, { "epoch": 1.17, "learning_rate": 3.346659194740827e-05, "loss": 0.0043, "step": 8700 }, { "epoch": 1.18, "learning_rate": 3.313431868940551e-05, "loss": 0.0062, "step": 8800 }, { "epoch": 1.2, "learning_rate": 3.280043047860958e-05, "loss": 0.0096, "step": 8900 }, { "epoch": 1.21, "learning_rate": 3.24649936037558e-05, "loss": 0.0049, "step": 9000 }, { "epoch": 1.22, "learning_rate": 3.21280746610446e-05, "loss": 0.0043, "step": 9100 }, { "epoch": 1.24, "learning_rate": 3.1789740540919856e-05, "loss": 0.006, "step": 9200 }, { "epoch": 1.25, "learning_rate": 3.145005841478868e-05, "loss": 0.0053, "step": 9300 }, { "epoch": 1.26, "learning_rate": 3.1109095721685525e-05, "loss": 0.0075, "step": 9400 }, { "epoch": 1.28, "learning_rate": 3.076692015488315e-05, "loss": 0.0048, "step": 9500 }, { "epoch": 1.29, "learning_rate": 3.0423599648453037e-05, "loss": 0.0063, "step": 9600 }, { "epoch": 1.31, "learning_rate": 3.007920236377807e-05, "loss": 0.0075, "step": 9700 }, { "epoch": 1.32, "learning_rate": 2.9733796676020104e-05, "loss": 0.0082, "step": 9800 }, { "epoch": 1.33, "learning_rate": 2.938745116054501e-05, "loss": 0.0054, "step": 9900 }, { "epoch": 1.35, "learning_rate": 2.9040234579308025e-05, "loss": 0.0042, "step": 10000 }, { "epoch": 1.36, "learning_rate": 2.8692215867202042e-05, "loss": 0.0112, "step": 10100 }, { "epoch": 1.37, "learning_rate": 2.8343464118371603e-05, "loss": 0.0059, "step": 10200 }, { "epoch": 1.39, "learning_rate": 2.7994048572495225e-05, "loss": 0.0034, "step": 10300 }, { "epoch": 1.4, "learning_rate": 2.7644038601038813e-05, "loss": 0.0082, "step": 10400 }, { "epoch": 1.41, "learning_rate": 2.7293503693483008e-05, "loss": 0.0047, "step": 10500 }, { "epoch": 1.43, "learning_rate": 2.6942513443527008e-05, "loss": 0.0024, "step": 10600 }, { "epoch": 1.44, "learning_rate": 2.6591137535271742e-05, "loss": 0.0041, "step": 10700 }, { "epoch": 1.45, "learning_rate": 2.6239445729385104e-05, "loss": 0.0041, "step": 10800 }, { "epoch": 1.47, "learning_rate": 2.5887507849251914e-05, "loss": 0.0059, "step": 10900 }, { "epoch": 1.48, "learning_rate": 2.5535393767111533e-05, "loss": 0.0032, "step": 11000 }, { "epoch": 1.49, "learning_rate": 2.5183173390185736e-05, "loss": 0.004, "step": 11100 }, { "epoch": 1.51, "learning_rate": 2.483091664679959e-05, "loss": 0.0039, "step": 11200 }, { "epoch": 1.52, "learning_rate": 2.44786934724982e-05, "loss": 0.004, "step": 11300 }, { "epoch": 1.53, "learning_rate": 2.4126573796162015e-05, "loss": 0.0033, "step": 11400 }, { "epoch": 1.55, "learning_rate": 2.377462752612344e-05, "loss": 0.0091, "step": 11500 }, { "epoch": 1.56, "learning_rate": 2.3422924536287548e-05, "loss": 0.0049, "step": 11600 }, { "epoch": 1.57, "learning_rate": 2.307153465225958e-05, "loss": 0.0048, "step": 11700 }, { "epoch": 1.59, "learning_rate": 2.272052763748209e-05, "loss": 0.0024, "step": 11800 }, { "epoch": 1.6, "learning_rate": 2.2369973179384445e-05, "loss": 0.0058, "step": 11900 }, { "epoch": 1.61, "learning_rate": 2.201994087554733e-05, "loss": 0.0098, "step": 12000 }, { "epoch": 1.63, "learning_rate": 2.167050021988514e-05, "loss": 0.0063, "step": 12100 }, { "epoch": 1.64, "learning_rate": 2.1321720588848995e-05, "loss": 0.0035, "step": 12200 }, { "epoch": 1.65, "learning_rate": 2.097367122765301e-05, "loss": 0.0075, "step": 12300 }, { "epoch": 1.67, "learning_rate": 2.0626421236526656e-05, "loss": 0.0048, "step": 12400 }, { "epoch": 1.68, "learning_rate": 2.0280039556995902e-05, "loss": 0.0036, "step": 12500 }, { "epoch": 1.7, "learning_rate": 1.9934594958195834e-05, "loss": 0.0039, "step": 12600 }, { "epoch": 1.71, "learning_rate": 1.9590156023217553e-05, "loss": 0.0045, "step": 12700 }, { "epoch": 1.72, "learning_rate": 1.9250219243814994e-05, "loss": 0.0051, "step": 12800 }, { "epoch": 1.74, "learning_rate": 1.8907984814652774e-05, "loss": 0.0033, "step": 12900 }, { "epoch": 1.75, "learning_rate": 1.8566959868093257e-05, "loss": 0.0053, "step": 13000 }, { "epoch": 1.76, "learning_rate": 1.8227212109768717e-05, "loss": 0.0018, "step": 13100 }, { "epoch": 1.78, "learning_rate": 1.7888808991743948e-05, "loss": 0.0056, "step": 13200 }, { "epoch": 1.79, "learning_rate": 1.7551817699124613e-05, "loss": 0.0054, "step": 13300 }, { "epoch": 1.8, "learning_rate": 1.721630513671862e-05, "loss": 0.0087, "step": 13400 }, { "epoch": 1.82, "learning_rate": 1.6882337915753093e-05, "loss": 0.0054, "step": 13500 }, { "epoch": 1.83, "learning_rate": 1.65499823406497e-05, "loss": 0.0045, "step": 13600 }, { "epoch": 1.84, "learning_rate": 1.621930439586083e-05, "loss": 0.0043, "step": 13700 }, { "epoch": 1.86, "learning_rate": 1.5890369732769356e-05, "loss": 0.0031, "step": 13800 }, { "epoch": 1.87, "learning_rate": 1.5563243656654454e-05, "loss": 0.0038, "step": 13900 }, { "epoch": 1.88, "learning_rate": 1.5237991113726185e-05, "loss": 0.0018, "step": 14000 }, { "epoch": 1.9, "learning_rate": 1.4914676678231354e-05, "loss": 0.0073, "step": 14100 }, { "epoch": 1.91, "learning_rate": 1.4593364539633242e-05, "loss": 0.0049, "step": 14200 }, { "epoch": 1.92, "learning_rate": 1.4274118489867638e-05, "loss": 0.0042, "step": 14300 }, { "epoch": 1.94, "learning_rate": 1.3957001910677942e-05, "loss": 0.0047, "step": 14400 }, { "epoch": 1.95, "learning_rate": 1.3642077761031574e-05, "loss": 0.0035, "step": 14500 }, { "epoch": 1.96, "learning_rate": 1.3329408564620358e-05, "loss": 0.0054, "step": 14600 }, { "epoch": 1.98, "learning_rate": 1.3019056397447415e-05, "loss": 0.0023, "step": 14700 }, { "epoch": 1.99, "learning_rate": 1.2711082875502745e-05, "loss": 0.0057, "step": 14800 }, { "epoch": 2.0, "learning_rate": 1.2405549142530348e-05, "loss": 0.0029, "step": 14900 }, { "epoch": 2.02, "learning_rate": 1.2102515857888976e-05, "loss": 0.0027, "step": 15000 }, { "epoch": 2.03, "learning_rate": 1.1802043184509044e-05, "loss": 0.0032, "step": 15100 }, { "epoch": 2.05, "learning_rate": 1.1504190776948205e-05, "loss": 0.0019, "step": 15200 }, { "epoch": 2.06, "learning_rate": 1.1209017769547758e-05, "loss": 0.0009, "step": 15300 }, { "epoch": 2.07, "learning_rate": 1.0916582764692346e-05, "loss": 0.0009, "step": 15400 }, { "epoch": 2.09, "learning_rate": 1.062694382117534e-05, "loss": 0.0009, "step": 15500 }, { "epoch": 2.1, "learning_rate": 1.0340158442672049e-05, "loss": 0.0012, "step": 15600 }, { "epoch": 2.11, "learning_rate": 1.005910772256064e-05, "loss": 0.0074, "step": 15700 }, { "epoch": 2.13, "learning_rate": 9.778169762000127e-06, "loss": 0.0012, "step": 15800 }, { "epoch": 2.14, "learning_rate": 9.500253878419744e-06, "loss": 0.0023, "step": 15900 }, { "epoch": 2.15, "learning_rate": 9.225415248048676e-06, "loss": 0.0013, "step": 16000 }, { "epoch": 2.17, "learning_rate": 8.956409824144358e-06, "loss": 0.0031, "step": 16100 }, { "epoch": 2.18, "learning_rate": 8.687856651695316e-06, "loss": 0.0029, "step": 16200 }, { "epoch": 2.19, "learning_rate": 8.422542022321477e-06, "loss": 0.0038, "step": 16300 }, { "epoch": 2.21, "learning_rate": 8.160518610453013e-06, "loss": 0.0032, "step": 16400 }, { "epoch": 2.22, "learning_rate": 7.901838437095879e-06, "loss": 0.0005, "step": 16500 }, { "epoch": 2.23, "learning_rate": 7.646552859503738e-06, "loss": 0.0021, "step": 16600 }, { "epoch": 2.25, "learning_rate": 7.3947125609818005e-06, "loss": 0.0007, "step": 16700 }, { "epoch": 2.26, "learning_rate": 7.146367540824281e-06, "loss": 0.002, "step": 16800 }, { "epoch": 2.27, "learning_rate": 6.9015671043878185e-06, "loss": 0.0028, "step": 16900 }, { "epoch": 2.29, "learning_rate": 6.660359853302589e-06, "loss": 0.0026, "step": 17000 }, { "epoch": 2.3, "learning_rate": 6.422793675823113e-06, "loss": 0.0004, "step": 17100 }, { "epoch": 2.31, "learning_rate": 6.188915737320777e-06, "loss": 0.0018, "step": 17200 }, { "epoch": 2.33, "learning_rate": 5.958772470919794e-06, "loss": 0.0043, "step": 17300 }, { "epoch": 2.34, "learning_rate": 5.732409568278555e-06, "loss": 0.0022, "step": 17400 }, { "epoch": 2.35, "learning_rate": 5.509871970518238e-06, "loss": 0.0002, "step": 17500 }, { "epoch": 2.37, "learning_rate": 5.291203859300376e-06, "loss": 0.0003, "step": 17600 }, { "epoch": 2.38, "learning_rate": 5.076448648055207e-06, "loss": 0.0014, "step": 17700 }, { "epoch": 2.39, "learning_rate": 4.86564897336258e-06, "loss": 0.0012, "step": 17800 }, { "epoch": 2.41, "learning_rate": 4.658846686487034e-06, "loss": 0.0035, "step": 17900 }, { "epoch": 2.42, "learning_rate": 4.4560828450688815e-06, "loss": 0.0014, "step": 18000 }, { "epoch": 2.44, "learning_rate": 4.2573977049727574e-06, "loss": 0.0012, "step": 18100 }, { "epoch": 2.45, "learning_rate": 4.06283071229539e-06, "loss": 0.0019, "step": 18200 }, { "epoch": 2.46, "learning_rate": 3.872420495534187e-06, "loss": 0.0022, "step": 18300 }, { "epoch": 2.48, "learning_rate": 3.686204857918046e-06, "loss": 0.0007, "step": 18400 }, { "epoch": 2.49, "learning_rate": 3.5042207699021e-06, "loss": 0.0044, "step": 18500 }, { "epoch": 2.5, "learning_rate": 3.3265043618277455e-06, "loss": 0.0018, "step": 18600 }, { "epoch": 2.52, "learning_rate": 3.153090916749471e-06, "loss": 0.0003, "step": 18700 }, { "epoch": 2.53, "learning_rate": 2.9840148634299374e-06, "loss": 0.0007, "step": 18800 }, { "epoch": 2.54, "learning_rate": 2.8193097695046076e-06, "loss": 0.0017, "step": 18900 }, { "epoch": 2.56, "learning_rate": 2.6590083348174006e-06, "loss": 0.0008, "step": 19000 }, { "epoch": 2.57, "learning_rate": 2.503142384928589e-06, "loss": 0.0013, "step": 19100 }, { "epoch": 2.58, "learning_rate": 2.351742864796275e-06, "loss": 0.0008, "step": 19200 }, { "epoch": 2.6, "learning_rate": 2.204839832632749e-06, "loss": 0.0025, "step": 19300 }, { "epoch": 2.61, "learning_rate": 2.062462453936828e-06, "loss": 0.0039, "step": 19400 }, { "epoch": 2.62, "learning_rate": 1.9246389957034705e-06, "loss": 0.0008, "step": 19500 }, { "epoch": 2.64, "learning_rate": 1.7913968208117833e-06, "loss": 0.0018, "step": 19600 }, { "epoch": 2.65, "learning_rate": 1.664025834061389e-06, "loss": 0.0023, "step": 19700 }, { "epoch": 2.66, "learning_rate": 1.53997821489342e-06, "loss": 0.0035, "step": 19800 }, { "epoch": 2.68, "learning_rate": 1.4205882479710276e-06, "loss": 0.0034, "step": 19900 }, { "epoch": 2.69, "learning_rate": 1.3058796364669029e-06, "loss": 0.0024, "step": 20000 }, { "epoch": 2.7, "learning_rate": 1.1958751541374602e-06, "loss": 0.0012, "step": 20100 }, { "epoch": 2.72, "learning_rate": 1.0905966408014807e-06, "loss": 0.0017, "step": 20200 }, { "epoch": 2.73, "learning_rate": 9.90064998004095e-07, "loss": 0.0034, "step": 20300 }, { "epoch": 2.74, "learning_rate": 8.943001848670724e-07, "loss": 0.0009, "step": 20400 }, { "epoch": 2.76, "learning_rate": 8.033212141262653e-07, "loss": 0.0019, "step": 20500 }, { "epoch": 2.77, "learning_rate": 7.171461483568537e-07, "loss": 0.0034, "step": 20600 }, { "epoch": 2.78, "learning_rate": 6.357920963873054e-07, "loss": 0.0037, "step": 20700 }, { "epoch": 2.8, "learning_rate": 5.592752099026599e-07, "loss": 0.0008, "step": 20800 }, { "epoch": 2.81, "learning_rate": 4.876106802378377e-07, "loss": 0.0005, "step": 20900 }, { "epoch": 2.83, "learning_rate": 4.2081273536161424e-07, "loss": 0.0017, "step": 21000 }, { "epoch": 2.84, "learning_rate": 3.5889463705187367e-07, "loss": 0.0016, "step": 21100 }, { "epoch": 2.85, "learning_rate": 3.018686782626512e-07, "loss": 0.0013, "step": 21200 }, { "epoch": 2.87, "learning_rate": 2.5024309873995513e-07, "loss": 0.0017, "step": 21300 }, { "epoch": 2.88, "learning_rate": 2.0298522443275014e-07, "loss": 0.001, "step": 21400 }, { "epoch": 2.89, "learning_rate": 1.6065044323308464e-07, "loss": 0.0014, "step": 21500 }, { "epoch": 2.91, "learning_rate": 1.2324716010710114e-07, "loss": 0.002, "step": 21600 }, { "epoch": 2.92, "learning_rate": 9.078280094243219e-08, "loss": 0.0013, "step": 21700 }, { "epoch": 2.93, "learning_rate": 6.326381107389646e-08, "loss": 0.0013, "step": 21800 }, { "epoch": 2.95, "learning_rate": 4.0695654003883313e-08, "loss": 0.0028, "step": 21900 }, { "epoch": 2.96, "learning_rate": 2.308281031763726e-08, "loss": 0.0023, "step": 22000 }, { "epoch": 2.97, "learning_rate": 1.0428776793702798e-08, "loss": 0.0016, "step": 22100 }, { "epoch": 2.99, "learning_rate": 2.7360657097047492e-09, "loss": 0.0042, "step": 22200 }, { "epoch": 3.0, "step": 22296, "total_flos": 9.011276227990733e+18, "train_loss": 0.020508435523056583, "train_runtime": 294350.325, "train_samples_per_second": 1.212, "train_steps_per_second": 0.076 } ], "logging_steps": 100, "max_steps": 22296, "num_train_epochs": 3, "save_steps": 3000, "total_flos": 9.011276227990733e+18, "trial_name": null, "trial_params": null }