|
{ |
|
"best_metric": 0.8509230445807242, |
|
"best_model_checkpoint": "result/my-sup-simcse-roberta-base_filtered_final_nli_pos_neg", |
|
"epoch": 3.0, |
|
"global_step": 1542, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_avg_sts": 0.7970172556800076, |
|
"eval_sickr_spearman": 0.75025416187497, |
|
"eval_stsb_spearman": 0.8437803494850452, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_avg_sts": 0.8184611939888191, |
|
"eval_sickr_spearman": 0.787718901307712, |
|
"eval_stsb_spearman": 0.8492034866699262, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_avg_sts": 0.82628745175756, |
|
"eval_sickr_spearman": 0.7880150610791795, |
|
"eval_stsb_spearman": 0.8645598424359405, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_avg_sts": 0.8214663219382123, |
|
"eval_sickr_spearman": 0.7863345008709616, |
|
"eval_stsb_spearman": 0.856598143005463, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_avg_sts": 0.8245359107619952, |
|
"eval_sickr_spearman": 0.7852220525305533, |
|
"eval_stsb_spearman": 0.8638497689934371, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_avg_sts": 0.8369552464117445, |
|
"eval_sickr_spearman": 0.8047760902992264, |
|
"eval_stsb_spearman": 0.8691344025242626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_avg_sts": 0.8347841673419429, |
|
"eval_sickr_spearman": 0.8065769684165468, |
|
"eval_stsb_spearman": 0.862991366267339, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_avg_sts": 0.8365894758739485, |
|
"eval_sickr_spearman": 0.807168999772873, |
|
"eval_stsb_spearman": 0.866009951975024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_avg_sts": 0.8373694984089681, |
|
"eval_sickr_spearman": 0.8121874333444314, |
|
"eval_stsb_spearman": 0.8625515634735048, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_avg_sts": 0.8353378734135991, |
|
"eval_sickr_spearman": 0.8028517722512096, |
|
"eval_stsb_spearman": 0.8678239745759886, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_avg_sts": 0.8421542038447196, |
|
"eval_sickr_spearman": 0.8175947747443341, |
|
"eval_stsb_spearman": 0.8667136329451051, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_avg_sts": 0.8360416637090009, |
|
"eval_sickr_spearman": 0.8098268968331538, |
|
"eval_stsb_spearman": 0.8622564305848479, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_avg_sts": 0.8432321348956329, |
|
"eval_sickr_spearman": 0.8172194597176966, |
|
"eval_stsb_spearman": 0.8692448100735692, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_avg_sts": 0.8406118882697353, |
|
"eval_sickr_spearman": 0.8133721684925038, |
|
"eval_stsb_spearman": 0.8678516080469669, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_avg_sts": 0.8364168982681424, |
|
"eval_sickr_spearman": 0.8088663228354931, |
|
"eval_stsb_spearman": 0.8639674737007915, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_avg_sts": 0.8411726277287294, |
|
"eval_sickr_spearman": 0.8214387037924262, |
|
"eval_stsb_spearman": 0.8609065516650325, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_avg_sts": 0.8446725188995174, |
|
"eval_sickr_spearman": 0.824545739682255, |
|
"eval_stsb_spearman": 0.86479929811678, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_avg_sts": 0.846832980887532, |
|
"eval_sickr_spearman": 0.8305859869058076, |
|
"eval_stsb_spearman": 0.8630799748692564, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_avg_sts": 0.8417173844571794, |
|
"eval_sickr_spearman": 0.8217314533556915, |
|
"eval_stsb_spearman": 0.8617033155586672, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.378728923476006e-05, |
|
"loss": 0.5309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_avg_sts": 0.8394389881737694, |
|
"eval_sickr_spearman": 0.8176411247572218, |
|
"eval_stsb_spearman": 0.8612368515903169, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_avg_sts": 0.8424515638291671, |
|
"eval_sickr_spearman": 0.8244119250336484, |
|
"eval_stsb_spearman": 0.8604912026246858, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_avg_sts": 0.843505143147442, |
|
"eval_sickr_spearman": 0.8292789645734737, |
|
"eval_stsb_spearman": 0.8577313217214102, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_avg_sts": 0.843100574356599, |
|
"eval_sickr_spearman": 0.8253128924344243, |
|
"eval_stsb_spearman": 0.8608882562787737, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_avg_sts": 0.8431532651801806, |
|
"eval_sickr_spearman": 0.8231387646278283, |
|
"eval_stsb_spearman": 0.863167765732533, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_avg_sts": 0.8411549297525207, |
|
"eval_sickr_spearman": 0.8223049447068626, |
|
"eval_stsb_spearman": 0.8600049147981788, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_avg_sts": 0.8444909588234402, |
|
"eval_sickr_spearman": 0.8264434965311761, |
|
"eval_stsb_spearman": 0.8625384211157042, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_avg_sts": 0.8442403156760934, |
|
"eval_sickr_spearman": 0.8282549414908131, |
|
"eval_stsb_spearman": 0.8602256898613736, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_avg_sts": 0.8433546951723836, |
|
"eval_sickr_spearman": 0.8271446545499697, |
|
"eval_stsb_spearman": 0.8595647357947974, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_avg_sts": 0.8420306309738643, |
|
"eval_sickr_spearman": 0.8233070175761659, |
|
"eval_stsb_spearman": 0.8607542443715627, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_avg_sts": 0.8391026032092552, |
|
"eval_sickr_spearman": 0.8208057019065734, |
|
"eval_stsb_spearman": 0.8573995045119369, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_avg_sts": 0.8413993052906461, |
|
"eval_sickr_spearman": 0.8210936963907962, |
|
"eval_stsb_spearman": 0.8617049141904959, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_avg_sts": 0.8434142809317196, |
|
"eval_sickr_spearman": 0.8264181361096167, |
|
"eval_stsb_spearman": 0.8604104257538223, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_avg_sts": 0.8445746067631623, |
|
"eval_sickr_spearman": 0.8273452804606768, |
|
"eval_stsb_spearman": 0.8618039330656478, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_avg_sts": 0.8429633955631959, |
|
"eval_sickr_spearman": 0.8270336546745461, |
|
"eval_stsb_spearman": 0.8588931364518456, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_avg_sts": 0.8472516730528594, |
|
"eval_sickr_spearman": 0.8334014259788056, |
|
"eval_stsb_spearman": 0.8611019201269132, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_avg_sts": 0.8509230445807242, |
|
"eval_sickr_spearman": 0.8376631295472063, |
|
"eval_stsb_spearman": 0.8641829596142421, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_avg_sts": 0.8453984948023106, |
|
"eval_sickr_spearman": 0.82835503830621, |
|
"eval_stsb_spearman": 0.8624419512984111, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_avg_sts": 0.8441717460262391, |
|
"eval_sickr_spearman": 0.8250034760789602, |
|
"eval_stsb_spearman": 0.8633400159735181, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_avg_sts": 0.8480020725945073, |
|
"eval_sickr_spearman": 0.8306723468261933, |
|
"eval_stsb_spearman": 0.8653317983628214, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7574578469520103e-05, |
|
"loss": 0.2714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_avg_sts": 0.8467505999509635, |
|
"eval_sickr_spearman": 0.8330692909123609, |
|
"eval_stsb_spearman": 0.8604319089895659, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_avg_sts": 0.8494326339830918, |
|
"eval_sickr_spearman": 0.8343606551056261, |
|
"eval_stsb_spearman": 0.8645046128605576, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_avg_sts": 0.8452826795684555, |
|
"eval_sickr_spearman": 0.8291940455861312, |
|
"eval_stsb_spearman": 0.8613713135507798, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_avg_sts": 0.8457791155470658, |
|
"eval_sickr_spearman": 0.8285575854609747, |
|
"eval_stsb_spearman": 0.8630006456331568, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_avg_sts": 0.8447554891917526, |
|
"eval_sickr_spearman": 0.8279863996026727, |
|
"eval_stsb_spearman": 0.8615245787808324, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_avg_sts": 0.847764082068214, |
|
"eval_sickr_spearman": 0.8317096745239522, |
|
"eval_stsb_spearman": 0.8638184896124759, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_avg_sts": 0.8451478500577279, |
|
"eval_sickr_spearman": 0.828890681149448, |
|
"eval_stsb_spearman": 0.8614050189660079, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_avg_sts": 0.8439050801814342, |
|
"eval_sickr_spearman": 0.8240606735888313, |
|
"eval_stsb_spearman": 0.8637494867740371, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_avg_sts": 0.8447525482867402, |
|
"eval_sickr_spearman": 0.8263380682635195, |
|
"eval_stsb_spearman": 0.863167028309961, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_avg_sts": 0.8455355460641325, |
|
"eval_sickr_spearman": 0.8271100241258328, |
|
"eval_stsb_spearman": 0.8639610680024322, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_avg_sts": 0.8467670558910284, |
|
"eval_sickr_spearman": 0.8293558143357747, |
|
"eval_stsb_spearman": 0.8641782974462822, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_avg_sts": 0.8438803647863737, |
|
"eval_sickr_spearman": 0.8270033470495388, |
|
"eval_stsb_spearman": 0.8607573825232087, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_avg_sts": 0.8454224221400783, |
|
"eval_sickr_spearman": 0.8282425974977434, |
|
"eval_stsb_spearman": 0.8626022467824133, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_avg_sts": 0.8449894067547106, |
|
"eval_sickr_spearman": 0.8266484932721141, |
|
"eval_stsb_spearman": 0.8633303202373069, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_avg_sts": 0.8460181390370703, |
|
"eval_sickr_spearman": 0.8278362063484757, |
|
"eval_stsb_spearman": 0.864200071725665, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_avg_sts": 0.8465150060489774, |
|
"eval_sickr_spearman": 0.8285329455059368, |
|
"eval_stsb_spearman": 0.8644970665920182, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_avg_sts": 0.8471917941480929, |
|
"eval_sickr_spearman": 0.8293239697155211, |
|
"eval_stsb_spearman": 0.8650596185806647, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_avg_sts": 0.8469228159218247, |
|
"eval_sickr_spearman": 0.8292969282054116, |
|
"eval_stsb_spearman": 0.8645487036382378, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_avg_sts": 0.846386304453965, |
|
"eval_sickr_spearman": 0.8283193992289432, |
|
"eval_stsb_spearman": 0.864453209678987, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_avg_sts": 0.8456684357631843, |
|
"eval_sickr_spearman": 0.8282710799408963, |
|
"eval_stsb_spearman": 0.8630657915854721, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3618677042801557e-06, |
|
"loss": 0.2376, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_avg_sts": 0.846184384136446, |
|
"eval_sickr_spearman": 0.8285680082099867, |
|
"eval_stsb_spearman": 0.8638007600629054, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_avg_sts": 0.8465883297884287, |
|
"eval_sickr_spearman": 0.8287545610079723, |
|
"eval_stsb_spearman": 0.864422098568885, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1542, |
|
"train_runtime": 2217.7106, |
|
"train_samples_per_second": 0.695 |
|
} |
|
], |
|
"max_steps": 1542, |
|
"num_train_epochs": 3, |
|
"total_flos": 148906707659089920, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|