{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.999366420274551, "global_step": 8281, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25816988945007324, "epoch": 0.0, "learning_rate": 4.9993962081874176e-05, "loss": 0.2805, "step": 1, "task_loss": 0.5853821039199829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18578635156154633, "epoch": 0.0, "learning_rate": 4.998792416374834e-05, "loss": 0.1863, "step": 2, "task_loss": 0.16780924797058105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23223145306110382, "epoch": 0.0, "learning_rate": 4.998188624562251e-05, "loss": 0.4064, "step": 3, "task_loss": 0.3836962878704071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25595200061798096, "epoch": 0.0, "learning_rate": 4.9975848327496685e-05, "loss": 0.3625, "step": 4, "task_loss": 0.6386852860450745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4139005243778229, "epoch": 0.0, "learning_rate": 4.996981040937085e-05, "loss": 0.389, "step": 5, "task_loss": 0.2773173153400421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17063988745212555, "epoch": 0.01, "learning_rate": 4.996377249124502e-05, "loss": 0.3621, "step": 6, "task_loss": 0.5356371998786926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26443102955818176, "epoch": 0.01, "learning_rate": 4.995773457311919e-05, "loss": 0.3757, "step": 7, "task_loss": 0.3861358165740967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24171142280101776, "epoch": 0.01, "learning_rate": 4.995169665499336e-05, "loss": 0.4133, "step": 8, "task_loss": 0.17129471898078918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2744707465171814, "epoch": 0.01, "learning_rate": 4.994565873686753e-05, "loss": 0.413, "step": 9, "task_loss": 0.3300344944000244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3388405442237854, "epoch": 0.01, "learning_rate": 4.99396208187417e-05, "loss": 0.2507, "step": 10, "task_loss": 0.2072344720363617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.147495299577713, "epoch": 0.01, "learning_rate": 4.993358290061587e-05, "loss": 0.3421, "step": 11, "task_loss": 0.3913913369178772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41760873794555664, "epoch": 0.01, "learning_rate": 4.9927544982490036e-05, "loss": 0.3769, "step": 12, "task_loss": 0.3634096086025238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4394819140434265, "epoch": 0.01, "learning_rate": 4.992150706436421e-05, "loss": 0.3362, "step": 13, "task_loss": 0.6043516397476196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38453197479248047, "epoch": 0.01, "learning_rate": 4.9915469146238384e-05, "loss": 0.2789, "step": 14, "task_loss": 0.4792601466178894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3559964597225189, "epoch": 0.01, "learning_rate": 4.9909431228112544e-05, "loss": 0.4384, "step": 15, "task_loss": 0.49861496686935425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2305917739868164, "epoch": 0.01, "learning_rate": 4.990339330998672e-05, "loss": 0.2783, "step": 16, "task_loss": 0.7065786123275757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4214351773262024, "epoch": 0.01, "learning_rate": 4.989735539186089e-05, "loss": 0.3208, "step": 17, "task_loss": 1.549605369567871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49794405698776245, "epoch": 0.02, "learning_rate": 4.989131747373506e-05, "loss": 0.5574, "step": 18, "task_loss": 0.6901516914367676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22809137403964996, "epoch": 0.02, "learning_rate": 4.9885279555609226e-05, "loss": 0.274, "step": 19, "task_loss": 1.07197904586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49502134323120117, "epoch": 0.02, "learning_rate": 4.98792416374834e-05, "loss": 0.5689, "step": 20, "task_loss": 1.110392689704895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2524433732032776, "epoch": 0.02, "learning_rate": 4.987320371935757e-05, "loss": 0.3597, "step": 21, "task_loss": 0.804271936416626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5214238166809082, "epoch": 0.02, "learning_rate": 4.9867165801231735e-05, "loss": 0.3759, "step": 22, "task_loss": 0.6734863519668579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1858106255531311, "epoch": 0.02, "learning_rate": 4.986112788310591e-05, "loss": 0.3365, "step": 23, "task_loss": 0.6390823125839233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2975519299507141, "epoch": 0.02, "learning_rate": 4.9855089964980076e-05, "loss": 0.3212, "step": 24, "task_loss": 0.1450793743133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3863736391067505, "epoch": 0.02, "learning_rate": 4.984905204685424e-05, "loss": 0.4371, "step": 25, "task_loss": 0.8929804563522339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31156808137893677, "epoch": 0.02, "learning_rate": 4.984301412872842e-05, "loss": 0.5147, "step": 26, "task_loss": 0.7455569505691528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32773831486701965, "epoch": 0.02, "learning_rate": 4.9836976210602584e-05, "loss": 0.3446, "step": 27, "task_loss": 1.1415926218032837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17569687962532043, "epoch": 0.02, "learning_rate": 4.983093829247676e-05, "loss": 0.2192, "step": 28, "task_loss": 0.20533543825149536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3167559802532196, "epoch": 0.02, "learning_rate": 4.9824900374350925e-05, "loss": 0.3656, "step": 29, "task_loss": 1.0243937969207764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4247322380542755, "epoch": 0.03, "learning_rate": 4.98188624562251e-05, "loss": 0.3238, "step": 30, "task_loss": 0.589907705783844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4397602081298828, "epoch": 0.03, "learning_rate": 4.9812824538099266e-05, "loss": 0.3644, "step": 31, "task_loss": 0.5212509632110596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23409625887870789, "epoch": 0.03, "learning_rate": 4.9806786619973434e-05, "loss": 0.333, "step": 32, "task_loss": 0.7313840985298157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12164507061243057, "epoch": 0.03, "learning_rate": 4.980074870184761e-05, "loss": 0.4048, "step": 33, "task_loss": 0.7823198437690735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6909140348434448, "epoch": 0.03, "learning_rate": 4.9794710783721775e-05, "loss": 0.4481, "step": 34, "task_loss": 0.6988318562507629 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3984873294830322, "epoch": 0.03, "learning_rate": 4.978867286559594e-05, "loss": 0.4058, "step": 35, "task_loss": 0.48753196001052856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13683383166790009, "epoch": 0.03, "learning_rate": 4.9782634947470116e-05, "loss": 0.3467, "step": 36, "task_loss": 0.29020625352859497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2913225293159485, "epoch": 0.03, "learning_rate": 4.977659702934428e-05, "loss": 0.4073, "step": 37, "task_loss": 0.7701352834701538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44909706711769104, "epoch": 0.03, "learning_rate": 4.977055911121846e-05, "loss": 0.3958, "step": 38, "task_loss": 0.46714961528778076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37656569480895996, "epoch": 0.03, "learning_rate": 4.9764521193092624e-05, "loss": 0.3124, "step": 39, "task_loss": 0.5885592699050903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2543381452560425, "epoch": 0.03, "learning_rate": 4.975848327496679e-05, "loss": 0.3764, "step": 40, "task_loss": 1.316689372062683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5124260187149048, "epoch": 0.03, "learning_rate": 4.9752445356840965e-05, "loss": 0.418, "step": 41, "task_loss": 0.5791172385215759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25226742029190063, "epoch": 0.04, "learning_rate": 4.974640743871513e-05, "loss": 0.2663, "step": 42, "task_loss": 0.38406485319137573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34966787695884705, "epoch": 0.04, "learning_rate": 4.97403695205893e-05, "loss": 0.4449, "step": 43, "task_loss": 1.1614713668823242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23528718948364258, "epoch": 0.04, "learning_rate": 4.9734331602463474e-05, "loss": 0.3279, "step": 44, "task_loss": 0.4693385064601898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6375081539154053, "epoch": 0.04, "learning_rate": 4.972829368433764e-05, "loss": 0.4547, "step": 45, "task_loss": 0.8986639976501465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5959746837615967, "epoch": 0.04, "learning_rate": 4.9722255766211815e-05, "loss": 0.4487, "step": 46, "task_loss": 0.7575843930244446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3843839168548584, "epoch": 0.04, "learning_rate": 4.971621784808598e-05, "loss": 0.4116, "step": 47, "task_loss": 0.8450673818588257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2406119555234909, "epoch": 0.04, "learning_rate": 4.9710179929960156e-05, "loss": 0.3084, "step": 48, "task_loss": 0.16761846840381622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35507768392562866, "epoch": 0.04, "learning_rate": 4.970414201183432e-05, "loss": 0.3447, "step": 49, "task_loss": 0.42451998591423035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21716907620429993, "epoch": 0.04, "learning_rate": 4.969810409370849e-05, "loss": 0.2907, "step": 50, "task_loss": 0.3825681507587433 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30666980147361755, "epoch": 0.04, "learning_rate": 4.9692066175582664e-05, "loss": 0.3688, "step": 51, "task_loss": 0.5329431295394897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22709280252456665, "epoch": 0.04, "learning_rate": 4.968602825745683e-05, "loss": 0.2923, "step": 52, "task_loss": 0.24181126058101654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.287906676530838, "epoch": 0.04, "learning_rate": 4.9679990339331e-05, "loss": 0.3116, "step": 53, "task_loss": 0.24539609253406525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20339882373809814, "epoch": 0.05, "learning_rate": 4.967395242120517e-05, "loss": 0.3187, "step": 54, "task_loss": 0.5900417566299438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2680729627609253, "epoch": 0.05, "learning_rate": 4.966791450307934e-05, "loss": 0.3322, "step": 55, "task_loss": 1.2729666233062744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2620672583580017, "epoch": 0.05, "learning_rate": 4.966187658495351e-05, "loss": 0.3288, "step": 56, "task_loss": 0.7690536975860596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2558075189590454, "epoch": 0.05, "learning_rate": 4.965583866682768e-05, "loss": 0.2878, "step": 57, "task_loss": 0.45123398303985596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47659721970558167, "epoch": 0.05, "learning_rate": 4.9649800748701855e-05, "loss": 0.4099, "step": 58, "task_loss": 0.394521027803421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19426442682743073, "epoch": 0.05, "learning_rate": 4.9643762830576015e-05, "loss": 0.2888, "step": 59, "task_loss": 0.2497028261423111 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2723541259765625, "epoch": 0.05, "learning_rate": 4.963772491245019e-05, "loss": 0.2959, "step": 60, "task_loss": 0.3907150626182556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5044282674789429, "epoch": 0.05, "learning_rate": 4.963168699432436e-05, "loss": 0.391, "step": 61, "task_loss": 0.6892834901809692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3972747027873993, "epoch": 0.05, "learning_rate": 4.962564907619853e-05, "loss": 0.3642, "step": 62, "task_loss": 1.03528892993927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44778820872306824, "epoch": 0.05, "learning_rate": 4.96196111580727e-05, "loss": 0.313, "step": 63, "task_loss": 0.6543607711791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4266464114189148, "epoch": 0.05, "learning_rate": 4.961357323994687e-05, "loss": 0.4076, "step": 64, "task_loss": 0.6509642004966736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1449139416217804, "epoch": 0.05, "learning_rate": 4.960753532182104e-05, "loss": 0.3039, "step": 65, "task_loss": 0.38982975482940674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3253495693206787, "epoch": 0.06, "learning_rate": 4.9601497403695206e-05, "loss": 0.37, "step": 66, "task_loss": 0.7798401117324829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35335421562194824, "epoch": 0.06, "learning_rate": 4.959545948556938e-05, "loss": 0.3082, "step": 67, "task_loss": 0.7975855469703674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4480602741241455, "epoch": 0.06, "learning_rate": 4.958942156744355e-05, "loss": 0.3148, "step": 68, "task_loss": 0.18220213055610657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3367661237716675, "epoch": 0.06, "learning_rate": 4.9583383649317714e-05, "loss": 0.2575, "step": 69, "task_loss": 0.6136961579322815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5279454588890076, "epoch": 0.06, "learning_rate": 4.957734573119189e-05, "loss": 0.4337, "step": 70, "task_loss": 0.7228941917419434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33608484268188477, "epoch": 0.06, "learning_rate": 4.957130781306606e-05, "loss": 0.3909, "step": 71, "task_loss": 1.099329948425293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29355472326278687, "epoch": 0.06, "learning_rate": 4.956526989494022e-05, "loss": 0.3845, "step": 72, "task_loss": 0.5266224145889282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31957435607910156, "epoch": 0.06, "learning_rate": 4.9559231976814396e-05, "loss": 0.3339, "step": 73, "task_loss": 0.43144696950912476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4300149977207184, "epoch": 0.06, "learning_rate": 4.955319405868857e-05, "loss": 0.3992, "step": 74, "task_loss": 0.570293664932251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8164573907852173, "epoch": 0.06, "learning_rate": 4.954715614056273e-05, "loss": 0.4478, "step": 75, "task_loss": 1.3061954975128174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2138664424419403, "epoch": 0.06, "learning_rate": 4.9541118222436905e-05, "loss": 0.5357, "step": 76, "task_loss": 0.4738396406173706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20729652047157288, "epoch": 0.07, "learning_rate": 4.953508030431108e-05, "loss": 0.2988, "step": 77, "task_loss": 0.1066388487815857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3723217248916626, "epoch": 0.07, "learning_rate": 4.9529042386185246e-05, "loss": 0.3181, "step": 78, "task_loss": 0.7636824250221252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29050979018211365, "epoch": 0.07, "learning_rate": 4.952300446805941e-05, "loss": 0.3519, "step": 79, "task_loss": 0.48806482553482056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5898647904396057, "epoch": 0.07, "learning_rate": 4.951696654993359e-05, "loss": 0.3993, "step": 80, "task_loss": 0.43252667784690857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34061485528945923, "epoch": 0.07, "learning_rate": 4.9510928631807754e-05, "loss": 0.3443, "step": 81, "task_loss": 1.4975894689559937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24292877316474915, "epoch": 0.07, "learning_rate": 4.950489071368192e-05, "loss": 0.263, "step": 82, "task_loss": 0.5394057631492615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28718599677085876, "epoch": 0.07, "learning_rate": 4.9498852795556095e-05, "loss": 0.2962, "step": 83, "task_loss": 0.22143614292144775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3874911069869995, "epoch": 0.07, "learning_rate": 4.949281487743026e-05, "loss": 0.3169, "step": 84, "task_loss": 0.4732312560081482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3567826747894287, "epoch": 0.07, "learning_rate": 4.948677695930443e-05, "loss": 0.3102, "step": 85, "task_loss": 1.1696805953979492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25368592143058777, "epoch": 0.07, "learning_rate": 4.9480739041178604e-05, "loss": 0.4811, "step": 86, "task_loss": 0.19513137638568878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3893371522426605, "epoch": 0.07, "learning_rate": 4.947470112305278e-05, "loss": 0.4123, "step": 87, "task_loss": 0.8287002444267273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27553173899650574, "epoch": 0.07, "learning_rate": 4.9468663204926945e-05, "loss": 0.2527, "step": 88, "task_loss": 0.4779343008995056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2027340829372406, "epoch": 0.08, "learning_rate": 4.946262528680111e-05, "loss": 0.2572, "step": 89, "task_loss": 0.2104167938232422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22549833357334137, "epoch": 0.08, "learning_rate": 4.9456587368675286e-05, "loss": 0.324, "step": 90, "task_loss": 0.4265645742416382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4884544014930725, "epoch": 0.08, "learning_rate": 4.945054945054945e-05, "loss": 0.3642, "step": 91, "task_loss": 0.34880319237709045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6234738230705261, "epoch": 0.08, "learning_rate": 4.944451153242362e-05, "loss": 0.4123, "step": 92, "task_loss": 0.6439784169197083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2962171137332916, "epoch": 0.08, "learning_rate": 4.9438473614297794e-05, "loss": 0.4553, "step": 93, "task_loss": 0.9175675511360168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3969919681549072, "epoch": 0.08, "learning_rate": 4.943243569617196e-05, "loss": 0.3523, "step": 94, "task_loss": 0.32375988364219666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5437160730361938, "epoch": 0.08, "learning_rate": 4.942639777804613e-05, "loss": 0.469, "step": 95, "task_loss": 0.8413633108139038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34137535095214844, "epoch": 0.08, "learning_rate": 4.94203598599203e-05, "loss": 0.4128, "step": 96, "task_loss": 1.7086763381958008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2708493769168854, "epoch": 0.08, "learning_rate": 4.941432194179447e-05, "loss": 0.342, "step": 97, "task_loss": 0.5729495286941528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3845900297164917, "epoch": 0.08, "learning_rate": 4.9408284023668644e-05, "loss": 0.3459, "step": 98, "task_loss": 0.640835165977478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17166396975517273, "epoch": 0.08, "learning_rate": 4.940224610554281e-05, "loss": 0.2638, "step": 99, "task_loss": 0.4222128987312317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5104278922080994, "epoch": 0.08, "learning_rate": 4.939620818741698e-05, "loss": 0.4602, "step": 100, "task_loss": 1.2593507766723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3089989423751831, "epoch": 0.09, "learning_rate": 4.939017026929115e-05, "loss": 0.3225, "step": 101, "task_loss": 0.26729482412338257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43301767110824585, "epoch": 0.09, "learning_rate": 4.938413235116532e-05, "loss": 0.3929, "step": 102, "task_loss": 1.302262783050537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4287503659725189, "epoch": 0.09, "learning_rate": 4.937809443303949e-05, "loss": 0.485, "step": 103, "task_loss": 0.9651143550872803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5477359890937805, "epoch": 0.09, "learning_rate": 4.937205651491366e-05, "loss": 0.4382, "step": 104, "task_loss": 1.432396650314331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5919656157493591, "epoch": 0.09, "learning_rate": 4.936601859678783e-05, "loss": 0.343, "step": 105, "task_loss": 0.8800464272499084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24281111359596252, "epoch": 0.09, "learning_rate": 4.9359980678662e-05, "loss": 0.3865, "step": 106, "task_loss": 0.5352722406387329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2820776402950287, "epoch": 0.09, "learning_rate": 4.935394276053617e-05, "loss": 0.4199, "step": 107, "task_loss": 0.9807997345924377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22061243653297424, "epoch": 0.09, "learning_rate": 4.934790484241034e-05, "loss": 0.3277, "step": 108, "task_loss": 0.15575382113456726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4099520742893219, "epoch": 0.09, "learning_rate": 4.934186692428451e-05, "loss": 0.3114, "step": 109, "task_loss": 0.48929744958877563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24980920553207397, "epoch": 0.09, "learning_rate": 4.933582900615868e-05, "loss": 0.3978, "step": 110, "task_loss": 0.2342258244752884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35572949051856995, "epoch": 0.09, "learning_rate": 4.932979108803285e-05, "loss": 0.4319, "step": 111, "task_loss": 0.882225513458252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20163123309612274, "epoch": 0.09, "learning_rate": 4.932375316990702e-05, "loss": 0.3542, "step": 112, "task_loss": 0.024291040375828743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33488476276397705, "epoch": 0.1, "learning_rate": 4.9317715251781185e-05, "loss": 0.2929, "step": 113, "task_loss": 0.6396358013153076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23511746525764465, "epoch": 0.1, "learning_rate": 4.931167733365536e-05, "loss": 0.3756, "step": 114, "task_loss": 0.3878803253173828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47338762879371643, "epoch": 0.1, "learning_rate": 4.9305639415529527e-05, "loss": 0.3755, "step": 115, "task_loss": 0.3775898814201355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.338032603263855, "epoch": 0.1, "learning_rate": 4.9299601497403694e-05, "loss": 0.4114, "step": 116, "task_loss": 0.4424063563346863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6006137728691101, "epoch": 0.1, "learning_rate": 4.929356357927787e-05, "loss": 0.441, "step": 117, "task_loss": 0.7305097579956055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5968002080917358, "epoch": 0.1, "learning_rate": 4.928752566115204e-05, "loss": 0.4182, "step": 118, "task_loss": 0.6539137959480286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2060955911874771, "epoch": 0.1, "learning_rate": 4.928148774302621e-05, "loss": 0.4359, "step": 119, "task_loss": 0.23030970990657806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41826102137565613, "epoch": 0.1, "learning_rate": 4.9275449824900376e-05, "loss": 0.5222, "step": 120, "task_loss": 1.048397183418274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45675361156463623, "epoch": 0.1, "learning_rate": 4.926941190677455e-05, "loss": 0.4799, "step": 121, "task_loss": 0.18881812691688538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1543813943862915, "epoch": 0.1, "learning_rate": 4.926337398864872e-05, "loss": 0.2702, "step": 122, "task_loss": 0.5565088391304016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22674205899238586, "epoch": 0.1, "learning_rate": 4.9257336070522884e-05, "loss": 0.2697, "step": 123, "task_loss": 0.8856381773948669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44395679235458374, "epoch": 0.1, "learning_rate": 4.925129815239706e-05, "loss": 0.3386, "step": 124, "task_loss": 0.6910078525543213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6182366609573364, "epoch": 0.11, "learning_rate": 4.9245260234271226e-05, "loss": 0.3977, "step": 125, "task_loss": 0.36996322870254517 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3325819969177246, "epoch": 0.11, "learning_rate": 4.923922231614539e-05, "loss": 0.3033, "step": 126, "task_loss": 1.2850160598754883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38252389430999756, "epoch": 0.11, "learning_rate": 4.923318439801957e-05, "loss": 0.3852, "step": 127, "task_loss": 0.33823931217193604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3019288182258606, "epoch": 0.11, "learning_rate": 4.922714647989374e-05, "loss": 0.4403, "step": 128, "task_loss": 2.0719704627990723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3690018951892853, "epoch": 0.11, "learning_rate": 4.92211085617679e-05, "loss": 0.4573, "step": 129, "task_loss": 1.5507882833480835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4736906886100769, "epoch": 0.11, "learning_rate": 4.9215070643642075e-05, "loss": 0.2867, "step": 130, "task_loss": 0.25044187903404236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30232638120651245, "epoch": 0.11, "learning_rate": 4.920903272551625e-05, "loss": 0.3516, "step": 131, "task_loss": 1.6450247764587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1854860931634903, "epoch": 0.11, "learning_rate": 4.920299480739041e-05, "loss": 0.3058, "step": 132, "task_loss": 0.06923609972000122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.407135933637619, "epoch": 0.11, "learning_rate": 4.919695688926458e-05, "loss": 0.4332, "step": 133, "task_loss": 1.0303617715835571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37842056155204773, "epoch": 0.11, "learning_rate": 4.919091897113876e-05, "loss": 0.4169, "step": 134, "task_loss": 1.5742859840393066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11493265628814697, "epoch": 0.11, "learning_rate": 4.9184881053012924e-05, "loss": 0.2742, "step": 135, "task_loss": 0.3966679275035858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5346572995185852, "epoch": 0.11, "learning_rate": 4.917884313488709e-05, "loss": 0.4029, "step": 136, "task_loss": 0.9560654163360596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24311745166778564, "epoch": 0.12, "learning_rate": 4.9172805216761266e-05, "loss": 0.4293, "step": 137, "task_loss": 0.11503170430660248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23056933283805847, "epoch": 0.12, "learning_rate": 4.916676729863543e-05, "loss": 0.3317, "step": 138, "task_loss": 0.5454956889152527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2211146354675293, "epoch": 0.12, "learning_rate": 4.91607293805096e-05, "loss": 0.2856, "step": 139, "task_loss": 0.07383111864328384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40937942266464233, "epoch": 0.12, "learning_rate": 4.9154691462383774e-05, "loss": 0.3256, "step": 140, "task_loss": 0.9068475365638733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23422488570213318, "epoch": 0.12, "learning_rate": 4.914865354425794e-05, "loss": 0.3743, "step": 141, "task_loss": 0.772557258605957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3856215476989746, "epoch": 0.12, "learning_rate": 4.914261562613211e-05, "loss": 0.3121, "step": 142, "task_loss": 1.003980278968811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2954632639884949, "epoch": 0.12, "learning_rate": 4.913657770800628e-05, "loss": 0.2943, "step": 143, "task_loss": 1.9202251434326172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17213040590286255, "epoch": 0.12, "learning_rate": 4.9130539789880456e-05, "loss": 0.2983, "step": 144, "task_loss": 1.1155539751052856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33316195011138916, "epoch": 0.12, "learning_rate": 4.912450187175462e-05, "loss": 0.3475, "step": 145, "task_loss": 0.5890751481056213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43856483697891235, "epoch": 0.12, "learning_rate": 4.911846395362879e-05, "loss": 0.3749, "step": 146, "task_loss": 1.2088651657104492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3035190999507904, "epoch": 0.12, "learning_rate": 4.9112426035502965e-05, "loss": 0.3782, "step": 147, "task_loss": 0.5619529485702515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29764872789382935, "epoch": 0.13, "learning_rate": 4.910638811737713e-05, "loss": 0.3253, "step": 148, "task_loss": 0.8805117011070251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22491507232189178, "epoch": 0.13, "learning_rate": 4.91003501992513e-05, "loss": 0.4551, "step": 149, "task_loss": 0.2589697241783142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4931153357028961, "epoch": 0.13, "learning_rate": 4.909431228112547e-05, "loss": 0.4107, "step": 150, "task_loss": 0.5675384998321533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4110710620880127, "epoch": 0.13, "learning_rate": 4.908827436299964e-05, "loss": 0.4342, "step": 151, "task_loss": 0.7999531030654907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34637752175331116, "epoch": 0.13, "learning_rate": 4.908223644487381e-05, "loss": 0.4092, "step": 152, "task_loss": 0.38146457076072693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30415773391723633, "epoch": 0.13, "learning_rate": 4.907619852674798e-05, "loss": 0.4607, "step": 153, "task_loss": 0.6748791933059692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6208047866821289, "epoch": 0.13, "learning_rate": 4.907016060862215e-05, "loss": 0.4497, "step": 154, "task_loss": 0.5220504403114319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3292478322982788, "epoch": 0.13, "learning_rate": 4.9064122690496316e-05, "loss": 0.4716, "step": 155, "task_loss": 0.5408774018287659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3978666663169861, "epoch": 0.13, "learning_rate": 4.905808477237049e-05, "loss": 0.3863, "step": 156, "task_loss": 0.3390129506587982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34065455198287964, "epoch": 0.13, "learning_rate": 4.905204685424466e-05, "loss": 0.3771, "step": 157, "task_loss": 0.7971680164337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5941371917724609, "epoch": 0.13, "learning_rate": 4.904600893611883e-05, "loss": 0.5904, "step": 158, "task_loss": 0.24042095243930817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25876864790916443, "epoch": 0.13, "learning_rate": 4.9039971017993e-05, "loss": 0.4258, "step": 159, "task_loss": 0.19387097656726837 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16864356398582458, "epoch": 0.14, "learning_rate": 4.903393309986717e-05, "loss": 0.2852, "step": 160, "task_loss": 0.6018144488334656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5536223649978638, "epoch": 0.14, "learning_rate": 4.902789518174134e-05, "loss": 0.3592, "step": 161, "task_loss": 0.5275382399559021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2101719081401825, "epoch": 0.14, "learning_rate": 4.9021857263615506e-05, "loss": 0.3061, "step": 162, "task_loss": 0.015712212771177292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15919873118400574, "epoch": 0.14, "learning_rate": 4.901581934548968e-05, "loss": 0.2562, "step": 163, "task_loss": 0.5559214949607849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32795944809913635, "epoch": 0.14, "learning_rate": 4.900978142736385e-05, "loss": 0.3572, "step": 164, "task_loss": 0.9139054417610168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2968144118785858, "epoch": 0.14, "learning_rate": 4.9003743509238014e-05, "loss": 0.3407, "step": 165, "task_loss": 0.8456998467445374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4627693295478821, "epoch": 0.14, "learning_rate": 4.899770559111219e-05, "loss": 0.5055, "step": 166, "task_loss": 0.9660850167274475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3389841616153717, "epoch": 0.14, "learning_rate": 4.8991667672986356e-05, "loss": 0.3668, "step": 167, "task_loss": 1.301005482673645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17484796047210693, "epoch": 0.14, "learning_rate": 4.898562975486053e-05, "loss": 0.3587, "step": 168, "task_loss": 0.0901103988289833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2166406214237213, "epoch": 0.14, "learning_rate": 4.89795918367347e-05, "loss": 0.2322, "step": 169, "task_loss": 0.026940368115901947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20582215487957, "epoch": 0.14, "learning_rate": 4.8973553918608864e-05, "loss": 0.2233, "step": 170, "task_loss": 0.2768481373786926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4102306067943573, "epoch": 0.14, "learning_rate": 4.896751600048304e-05, "loss": 0.2917, "step": 171, "task_loss": 0.7136456966400146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17633290588855743, "epoch": 0.15, "learning_rate": 4.8961478082357205e-05, "loss": 0.2741, "step": 172, "task_loss": 0.5400669574737549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37040984630584717, "epoch": 0.15, "learning_rate": 4.895544016423137e-05, "loss": 0.3682, "step": 173, "task_loss": 0.5297955870628357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36008045077323914, "epoch": 0.15, "learning_rate": 4.8949402246105546e-05, "loss": 0.3865, "step": 174, "task_loss": 1.293094515800476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3012641966342926, "epoch": 0.15, "learning_rate": 4.8943364327979713e-05, "loss": 0.3107, "step": 175, "task_loss": 0.6071553826332092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16874560713768005, "epoch": 0.15, "learning_rate": 4.893732640985389e-05, "loss": 0.3568, "step": 176, "task_loss": 0.48791053891181946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.305328905582428, "epoch": 0.15, "learning_rate": 4.8931288491728055e-05, "loss": 0.3334, "step": 177, "task_loss": 0.5327244997024536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19686727225780487, "epoch": 0.15, "learning_rate": 4.892525057360222e-05, "loss": 0.2972, "step": 178, "task_loss": 0.7281004190444946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31708723306655884, "epoch": 0.15, "learning_rate": 4.8919212655476396e-05, "loss": 0.3641, "step": 179, "task_loss": 0.4216811954975128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35918673872947693, "epoch": 0.15, "learning_rate": 4.891317473735056e-05, "loss": 0.3884, "step": 180, "task_loss": 0.689250111579895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25123131275177, "epoch": 0.15, "learning_rate": 4.890713681922474e-05, "loss": 0.356, "step": 181, "task_loss": 0.5716760158538818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34370747208595276, "epoch": 0.15, "learning_rate": 4.8901098901098904e-05, "loss": 0.338, "step": 182, "task_loss": 0.8569716811180115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19864091277122498, "epoch": 0.15, "learning_rate": 4.889506098297307e-05, "loss": 0.3252, "step": 183, "task_loss": 0.659917950630188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31270259618759155, "epoch": 0.16, "learning_rate": 4.8889023064847245e-05, "loss": 0.3916, "step": 184, "task_loss": 1.4612730741500854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37377360463142395, "epoch": 0.16, "learning_rate": 4.888298514672141e-05, "loss": 0.5808, "step": 185, "task_loss": 0.7126050591468811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29406899213790894, "epoch": 0.16, "learning_rate": 4.887694722859558e-05, "loss": 0.4167, "step": 186, "task_loss": 1.1095341444015503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1254437267780304, "epoch": 0.16, "learning_rate": 4.8870909310469754e-05, "loss": 0.2659, "step": 187, "task_loss": 0.3575878441333771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4643031358718872, "epoch": 0.16, "learning_rate": 4.886487139234392e-05, "loss": 0.4302, "step": 188, "task_loss": 0.6942890286445618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5741887092590332, "epoch": 0.16, "learning_rate": 4.885883347421809e-05, "loss": 0.3795, "step": 189, "task_loss": 1.2970455884933472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24287299811840057, "epoch": 0.16, "learning_rate": 4.885279555609226e-05, "loss": 0.2826, "step": 190, "task_loss": 0.585664689540863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1484246850013733, "epoch": 0.16, "learning_rate": 4.8846757637966436e-05, "loss": 0.2874, "step": 191, "task_loss": 0.05447469651699066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43349575996398926, "epoch": 0.16, "learning_rate": 4.88407197198406e-05, "loss": 0.2905, "step": 192, "task_loss": 0.7433083057403564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3356369733810425, "epoch": 0.16, "learning_rate": 4.883468180171477e-05, "loss": 0.404, "step": 193, "task_loss": 0.25878268480300903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44204750657081604, "epoch": 0.16, "learning_rate": 4.8828643883588944e-05, "loss": 0.3698, "step": 194, "task_loss": 0.44937726855278015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5335066914558411, "epoch": 0.16, "learning_rate": 4.882260596546311e-05, "loss": 0.3701, "step": 195, "task_loss": 0.5461884140968323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19670599699020386, "epoch": 0.17, "learning_rate": 4.881656804733728e-05, "loss": 0.2998, "step": 196, "task_loss": 0.8536654710769653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7039417028427124, "epoch": 0.17, "learning_rate": 4.881053012921145e-05, "loss": 0.4345, "step": 197, "task_loss": 0.5822523236274719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43034815788269043, "epoch": 0.17, "learning_rate": 4.880449221108562e-05, "loss": 0.4119, "step": 198, "task_loss": 0.6032153964042664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2675672769546509, "epoch": 0.17, "learning_rate": 4.879845429295979e-05, "loss": 0.4151, "step": 199, "task_loss": 0.6427316665649414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5726067423820496, "epoch": 0.17, "learning_rate": 4.879241637483396e-05, "loss": 0.4373, "step": 200, "task_loss": 1.5375341176986694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3098316490650177, "epoch": 0.17, "learning_rate": 4.8786378456708135e-05, "loss": 0.2882, "step": 201, "task_loss": 0.7666797637939453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37163761258125305, "epoch": 0.17, "learning_rate": 4.8780340538582295e-05, "loss": 0.3448, "step": 202, "task_loss": 0.5320181846618652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16711682081222534, "epoch": 0.17, "learning_rate": 4.877430262045647e-05, "loss": 0.3622, "step": 203, "task_loss": 0.19219565391540527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2333192229270935, "epoch": 0.17, "learning_rate": 4.876826470233064e-05, "loss": 0.3458, "step": 204, "task_loss": 0.5413380861282349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.573334813117981, "epoch": 0.17, "learning_rate": 4.8762226784204803e-05, "loss": 0.3731, "step": 205, "task_loss": 0.18013392388820648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17017324268817902, "epoch": 0.17, "learning_rate": 4.875618886607898e-05, "loss": 0.337, "step": 206, "task_loss": 0.2642764151096344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3357846438884735, "epoch": 0.17, "learning_rate": 4.875015094795315e-05, "loss": 0.3443, "step": 207, "task_loss": 0.8511983156204224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2904583811759949, "epoch": 0.18, "learning_rate": 4.874411302982732e-05, "loss": 0.3644, "step": 208, "task_loss": 0.6950564384460449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6159971356391907, "epoch": 0.18, "learning_rate": 4.8738075111701486e-05, "loss": 0.417, "step": 209, "task_loss": 1.0051993131637573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2751395106315613, "epoch": 0.18, "learning_rate": 4.873203719357566e-05, "loss": 0.259, "step": 210, "task_loss": 0.44548681378364563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14952373504638672, "epoch": 0.18, "learning_rate": 4.872599927544983e-05, "loss": 0.3419, "step": 211, "task_loss": 0.2996128797531128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.240716353058815, "epoch": 0.18, "learning_rate": 4.8719961357323994e-05, "loss": 0.3574, "step": 212, "task_loss": 0.5914297699928284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3645804524421692, "epoch": 0.18, "learning_rate": 4.871392343919817e-05, "loss": 0.3481, "step": 213, "task_loss": 0.07440241426229477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3713075816631317, "epoch": 0.18, "learning_rate": 4.8707885521072335e-05, "loss": 0.4508, "step": 214, "task_loss": 1.505163550376892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4446750581264496, "epoch": 0.18, "learning_rate": 4.87018476029465e-05, "loss": 0.4169, "step": 215, "task_loss": 0.6080370545387268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42608070373535156, "epoch": 0.18, "learning_rate": 4.8695809684820676e-05, "loss": 0.34, "step": 216, "task_loss": 1.165233850479126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44962722063064575, "epoch": 0.18, "learning_rate": 4.868977176669485e-05, "loss": 0.4527, "step": 217, "task_loss": 1.359510898590088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.347635954618454, "epoch": 0.18, "learning_rate": 4.868373384856901e-05, "loss": 0.463, "step": 218, "task_loss": 0.4809846580028534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24371974170207977, "epoch": 0.19, "learning_rate": 4.8677695930443185e-05, "loss": 0.3572, "step": 219, "task_loss": 0.7009860277175903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25305095314979553, "epoch": 0.19, "learning_rate": 4.867165801231736e-05, "loss": 0.3178, "step": 220, "task_loss": 0.6486716866493225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21840791404247284, "epoch": 0.19, "learning_rate": 4.8665620094191526e-05, "loss": 0.2503, "step": 221, "task_loss": 0.2857961356639862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22764679789543152, "epoch": 0.19, "learning_rate": 4.865958217606569e-05, "loss": 0.2923, "step": 222, "task_loss": 1.1480791568756104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6173543334007263, "epoch": 0.19, "learning_rate": 4.865354425793987e-05, "loss": 0.441, "step": 223, "task_loss": 0.7470206618309021 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.557291567325592, "epoch": 0.19, "learning_rate": 4.8647506339814034e-05, "loss": 0.4489, "step": 224, "task_loss": 0.689177930355072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42682474851608276, "epoch": 0.19, "learning_rate": 4.86414684216882e-05, "loss": 0.3617, "step": 225, "task_loss": 0.6550009250640869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1677248775959015, "epoch": 0.19, "learning_rate": 4.8635430503562375e-05, "loss": 0.2983, "step": 226, "task_loss": 0.2665528357028961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3583166003227234, "epoch": 0.19, "learning_rate": 4.862939258543654e-05, "loss": 0.4007, "step": 227, "task_loss": 0.4113485515117645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22209115326404572, "epoch": 0.19, "learning_rate": 4.862335466731071e-05, "loss": 0.4767, "step": 228, "task_loss": 1.2220337390899658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.591152012348175, "epoch": 0.19, "learning_rate": 4.8617316749184884e-05, "loss": 0.4903, "step": 229, "task_loss": 1.7407927513122559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29130685329437256, "epoch": 0.19, "learning_rate": 4.861127883105905e-05, "loss": 0.3066, "step": 230, "task_loss": 0.8779850602149963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27365842461586, "epoch": 0.2, "learning_rate": 4.8605240912933225e-05, "loss": 0.3202, "step": 231, "task_loss": 0.9441163539886475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2616477906703949, "epoch": 0.2, "learning_rate": 4.859920299480739e-05, "loss": 0.3274, "step": 232, "task_loss": 0.8562521934509277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3808269798755646, "epoch": 0.2, "learning_rate": 4.8593165076681566e-05, "loss": 0.2633, "step": 233, "task_loss": 0.5619776248931885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28243935108184814, "epoch": 0.2, "learning_rate": 4.858712715855573e-05, "loss": 0.3988, "step": 234, "task_loss": 0.32802894711494446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33323073387145996, "epoch": 0.2, "learning_rate": 4.85810892404299e-05, "loss": 0.3971, "step": 235, "task_loss": 0.6267164349555969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21033337712287903, "epoch": 0.2, "learning_rate": 4.8575051322304074e-05, "loss": 0.2833, "step": 236, "task_loss": 0.4125954806804657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5221607685089111, "epoch": 0.2, "learning_rate": 4.856901340417824e-05, "loss": 0.5004, "step": 237, "task_loss": 1.5617070198059082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.198834627866745, "epoch": 0.2, "learning_rate": 4.856297548605241e-05, "loss": 0.5516, "step": 238, "task_loss": 1.176544189453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3844684958457947, "epoch": 0.2, "learning_rate": 4.855693756792658e-05, "loss": 0.4421, "step": 239, "task_loss": 0.7043779492378235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27848494052886963, "epoch": 0.2, "learning_rate": 4.855089964980075e-05, "loss": 0.2971, "step": 240, "task_loss": 0.2784903347492218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7233941555023193, "epoch": 0.2, "learning_rate": 4.8544861731674924e-05, "loss": 0.4211, "step": 241, "task_loss": 0.6842274069786072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2772015631198883, "epoch": 0.2, "learning_rate": 4.853882381354909e-05, "loss": 0.3696, "step": 242, "task_loss": 0.4753953516483307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28759765625, "epoch": 0.21, "learning_rate": 4.853278589542326e-05, "loss": 0.3984, "step": 243, "task_loss": 0.3085748553276062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45128703117370605, "epoch": 0.21, "learning_rate": 4.852674797729743e-05, "loss": 0.3283, "step": 244, "task_loss": 0.7327007055282593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36166709661483765, "epoch": 0.21, "learning_rate": 4.85207100591716e-05, "loss": 0.4065, "step": 245, "task_loss": 0.17605814337730408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25296056270599365, "epoch": 0.21, "learning_rate": 4.8514672141045766e-05, "loss": 0.3166, "step": 246, "task_loss": 0.524057924747467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3108903765678406, "epoch": 0.21, "learning_rate": 4.850863422291994e-05, "loss": 0.3726, "step": 247, "task_loss": 0.6622505187988281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2930804491043091, "epoch": 0.21, "learning_rate": 4.850259630479411e-05, "loss": 0.3462, "step": 248, "task_loss": 0.2555572986602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47002166509628296, "epoch": 0.21, "learning_rate": 4.849655838666828e-05, "loss": 0.388, "step": 249, "task_loss": 0.3438532054424286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5938379168510437, "epoch": 0.21, "learning_rate": 4.849052046854245e-05, "loss": 0.3176, "step": 250, "task_loss": 0.28426429629325867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4488550126552582, "epoch": 0.21, "learning_rate": 4.848448255041662e-05, "loss": 0.3943, "step": 251, "task_loss": 1.217559814453125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21565239131450653, "epoch": 0.21, "learning_rate": 4.847844463229079e-05, "loss": 0.367, "step": 252, "task_loss": 0.8230108022689819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20017766952514648, "epoch": 0.21, "learning_rate": 4.847240671416496e-05, "loss": 0.4238, "step": 253, "task_loss": 0.6060188412666321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37959274649620056, "epoch": 0.21, "learning_rate": 4.846636879603913e-05, "loss": 0.305, "step": 254, "task_loss": 0.9876610040664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39832544326782227, "epoch": 0.22, "learning_rate": 4.84603308779133e-05, "loss": 0.2947, "step": 255, "task_loss": 0.5021925568580627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3502846360206604, "epoch": 0.22, "learning_rate": 4.8454292959787465e-05, "loss": 0.3081, "step": 256, "task_loss": 0.6578946709632874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19270899891853333, "epoch": 0.22, "learning_rate": 4.844825504166164e-05, "loss": 0.2989, "step": 257, "task_loss": 0.5173934698104858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3227487802505493, "epoch": 0.22, "learning_rate": 4.8442217123535806e-05, "loss": 0.4644, "step": 258, "task_loss": 0.6536293625831604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5462640523910522, "epoch": 0.22, "learning_rate": 4.8436179205409974e-05, "loss": 0.3752, "step": 259, "task_loss": 1.1624946594238281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3551231026649475, "epoch": 0.22, "learning_rate": 4.843014128728415e-05, "loss": 0.3021, "step": 260, "task_loss": 0.11768197268247604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28162717819213867, "epoch": 0.22, "learning_rate": 4.842410336915832e-05, "loss": 0.3605, "step": 261, "task_loss": 1.2060271501541138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2618270516395569, "epoch": 0.22, "learning_rate": 4.841806545103248e-05, "loss": 0.4038, "step": 262, "task_loss": 0.6179669499397278 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5169287323951721, "epoch": 0.22, "learning_rate": 4.8412027532906656e-05, "loss": 0.4219, "step": 263, "task_loss": 0.6710100173950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3042276203632355, "epoch": 0.22, "learning_rate": 4.840598961478083e-05, "loss": 0.3309, "step": 264, "task_loss": 0.4049620032310486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4004916548728943, "epoch": 0.22, "learning_rate": 4.8399951696655e-05, "loss": 0.3626, "step": 265, "task_loss": 0.7619703412055969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18103118240833282, "epoch": 0.22, "learning_rate": 4.8393913778529164e-05, "loss": 0.3102, "step": 266, "task_loss": 0.12635083496570587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3955288231372833, "epoch": 0.23, "learning_rate": 4.838787586040334e-05, "loss": 0.3845, "step": 267, "task_loss": 0.8333910703659058 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3358483910560608, "epoch": 0.23, "learning_rate": 4.8381837942277505e-05, "loss": 0.3969, "step": 268, "task_loss": 0.7327327728271484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6183207035064697, "epoch": 0.23, "learning_rate": 4.837580002415167e-05, "loss": 0.4966, "step": 269, "task_loss": 0.7672142386436462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4284032881259918, "epoch": 0.23, "learning_rate": 4.8369762106025847e-05, "loss": 0.3588, "step": 270, "task_loss": 0.38554489612579346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.298435240983963, "epoch": 0.23, "learning_rate": 4.8363724187900014e-05, "loss": 0.3192, "step": 271, "task_loss": 0.8453357219696045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18604081869125366, "epoch": 0.23, "learning_rate": 4.835768626977418e-05, "loss": 0.4136, "step": 272, "task_loss": 0.14790509641170502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1609693467617035, "epoch": 0.23, "learning_rate": 4.8351648351648355e-05, "loss": 0.2793, "step": 273, "task_loss": 0.21414852142333984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21765820682048798, "epoch": 0.23, "learning_rate": 4.834561043352253e-05, "loss": 0.2814, "step": 274, "task_loss": 0.38039907813072205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1675441861152649, "epoch": 0.23, "learning_rate": 4.833957251539669e-05, "loss": 0.2665, "step": 275, "task_loss": 0.04698711261153221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4158642888069153, "epoch": 0.23, "learning_rate": 4.833353459727086e-05, "loss": 0.4288, "step": 276, "task_loss": 0.3804851770401001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19586452841758728, "epoch": 0.23, "learning_rate": 4.832749667914504e-05, "loss": 0.3039, "step": 277, "task_loss": 0.12271113693714142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4234282672405243, "epoch": 0.23, "learning_rate": 4.83214587610192e-05, "loss": 0.3037, "step": 278, "task_loss": 0.3277617394924164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3377179503440857, "epoch": 0.24, "learning_rate": 4.831542084289337e-05, "loss": 0.3565, "step": 279, "task_loss": 0.6190507411956787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3592570126056671, "epoch": 0.24, "learning_rate": 4.8309382924767545e-05, "loss": 0.3361, "step": 280, "task_loss": 0.5719615817070007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2319561243057251, "epoch": 0.24, "learning_rate": 4.830334500664171e-05, "loss": 0.3792, "step": 281, "task_loss": 0.5975136756896973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36890077590942383, "epoch": 0.24, "learning_rate": 4.829730708851588e-05, "loss": 0.3507, "step": 282, "task_loss": 0.7881508469581604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30688631534576416, "epoch": 0.24, "learning_rate": 4.8291269170390054e-05, "loss": 0.4851, "step": 283, "task_loss": 1.1022228002548218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2913910448551178, "epoch": 0.24, "learning_rate": 4.828523125226422e-05, "loss": 0.2951, "step": 284, "task_loss": 0.5692667365074158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3001357913017273, "epoch": 0.24, "learning_rate": 4.827919333413839e-05, "loss": 0.316, "step": 285, "task_loss": 0.5103100538253784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19008879363536835, "epoch": 0.24, "learning_rate": 4.827315541601256e-05, "loss": 0.332, "step": 286, "task_loss": 0.3047529458999634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3289864957332611, "epoch": 0.24, "learning_rate": 4.826711749788673e-05, "loss": 0.3928, "step": 287, "task_loss": 0.48146453499794006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8252094388008118, "epoch": 0.24, "learning_rate": 4.8261079579760896e-05, "loss": 0.4665, "step": 288, "task_loss": 0.42987194657325745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3161754608154297, "epoch": 0.24, "learning_rate": 4.825504166163507e-05, "loss": 0.3388, "step": 289, "task_loss": 1.4513256549835205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16933982074260712, "epoch": 0.24, "learning_rate": 4.8249003743509244e-05, "loss": 0.2714, "step": 290, "task_loss": 0.8262923359870911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37988537549972534, "epoch": 0.25, "learning_rate": 4.824296582538341e-05, "loss": 0.3737, "step": 291, "task_loss": 0.469099760055542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25396668910980225, "epoch": 0.25, "learning_rate": 4.823692790725758e-05, "loss": 0.3128, "step": 292, "task_loss": 0.22486521303653717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4176521599292755, "epoch": 0.25, "learning_rate": 4.823088998913175e-05, "loss": 0.3963, "step": 293, "task_loss": 0.48993009328842163 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34238067269325256, "epoch": 0.25, "learning_rate": 4.822485207100592e-05, "loss": 0.3931, "step": 294, "task_loss": 0.6426882743835449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19826723635196686, "epoch": 0.25, "learning_rate": 4.821881415288009e-05, "loss": 0.3186, "step": 295, "task_loss": 0.23027630150318146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43875911831855774, "epoch": 0.25, "learning_rate": 4.821277623475426e-05, "loss": 0.4149, "step": 296, "task_loss": 1.1842646598815918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1716620773077011, "epoch": 0.25, "learning_rate": 4.820673831662843e-05, "loss": 0.3365, "step": 297, "task_loss": 0.5584362745285034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4358714520931244, "epoch": 0.25, "learning_rate": 4.8200700398502595e-05, "loss": 0.3862, "step": 298, "task_loss": 0.6126980185508728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4216727614402771, "epoch": 0.25, "learning_rate": 4.819466248037677e-05, "loss": 0.3903, "step": 299, "task_loss": 0.5612708330154419 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46830129623413086, "epoch": 0.25, "learning_rate": 4.8188624562250937e-05, "loss": 0.424, "step": 300, "task_loss": 0.6126656532287598 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3437400162220001, "epoch": 0.25, "learning_rate": 4.818258664412511e-05, "loss": 0.3715, "step": 301, "task_loss": 0.2198963165283203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3458477258682251, "epoch": 0.26, "learning_rate": 4.817654872599928e-05, "loss": 0.3308, "step": 302, "task_loss": 0.6421058773994446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3061091899871826, "epoch": 0.26, "learning_rate": 4.8170510807873445e-05, "loss": 0.3606, "step": 303, "task_loss": 0.48531925678253174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5407686233520508, "epoch": 0.26, "learning_rate": 4.816447288974762e-05, "loss": 0.3952, "step": 304, "task_loss": 1.4983210563659668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5865715146064758, "epoch": 0.26, "learning_rate": 4.8158434971621786e-05, "loss": 0.3531, "step": 305, "task_loss": 1.0585517883300781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15689745545387268, "epoch": 0.26, "learning_rate": 4.815239705349596e-05, "loss": 0.319, "step": 306, "task_loss": 0.05530761554837227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3637600243091583, "epoch": 0.26, "learning_rate": 4.814635913537013e-05, "loss": 0.2908, "step": 307, "task_loss": 1.0135647058486938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3493724465370178, "epoch": 0.26, "learning_rate": 4.8140321217244294e-05, "loss": 0.3799, "step": 308, "task_loss": 0.9135453104972839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.398184597492218, "epoch": 0.26, "learning_rate": 4.813428329911847e-05, "loss": 0.2983, "step": 309, "task_loss": 0.31708386540412903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30986547470092773, "epoch": 0.26, "learning_rate": 4.8128245380992635e-05, "loss": 0.4664, "step": 310, "task_loss": 1.2336593866348267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20752136409282684, "epoch": 0.26, "learning_rate": 4.812220746286681e-05, "loss": 0.3982, "step": 311, "task_loss": 0.19107981026172638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20283068716526031, "epoch": 0.26, "learning_rate": 4.811616954474098e-05, "loss": 0.2838, "step": 312, "task_loss": 0.1679132580757141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3719440698623657, "epoch": 0.26, "learning_rate": 4.8110131626615144e-05, "loss": 0.2413, "step": 313, "task_loss": 0.5856820344924927 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24177727103233337, "epoch": 0.27, "learning_rate": 4.810409370848932e-05, "loss": 0.3381, "step": 314, "task_loss": 0.473675400018692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30009961128234863, "epoch": 0.27, "learning_rate": 4.8098055790363485e-05, "loss": 0.3153, "step": 315, "task_loss": 0.6971793174743652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18764623999595642, "epoch": 0.27, "learning_rate": 4.809201787223765e-05, "loss": 0.3125, "step": 316, "task_loss": 0.21867595613002777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4035642743110657, "epoch": 0.27, "learning_rate": 4.8085979954111826e-05, "loss": 0.4733, "step": 317, "task_loss": 1.023959755897522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20800375938415527, "epoch": 0.27, "learning_rate": 4.807994203598599e-05, "loss": 0.3641, "step": 318, "task_loss": 0.5931673645973206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3392481207847595, "epoch": 0.27, "learning_rate": 4.807390411786016e-05, "loss": 0.384, "step": 319, "task_loss": 0.5908573269844055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2010572850704193, "epoch": 0.27, "learning_rate": 4.8067866199734334e-05, "loss": 0.2327, "step": 320, "task_loss": 0.5935060977935791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20867067575454712, "epoch": 0.27, "learning_rate": 4.806182828160851e-05, "loss": 0.4388, "step": 321, "task_loss": 1.138772964477539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21708956360816956, "epoch": 0.27, "learning_rate": 4.8055790363482676e-05, "loss": 0.3001, "step": 322, "task_loss": 0.6124125123023987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4039044678211212, "epoch": 0.27, "learning_rate": 4.804975244535684e-05, "loss": 0.3187, "step": 323, "task_loss": 1.301652431488037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29664894938468933, "epoch": 0.27, "learning_rate": 4.804371452723102e-05, "loss": 0.4175, "step": 324, "task_loss": 0.5054799914360046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25232619047164917, "epoch": 0.27, "learning_rate": 4.8037676609105184e-05, "loss": 0.45, "step": 325, "task_loss": 0.25673484802246094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24607592821121216, "epoch": 0.28, "learning_rate": 4.803163869097935e-05, "loss": 0.2717, "step": 326, "task_loss": 0.6230039596557617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32535046339035034, "epoch": 0.28, "learning_rate": 4.8025600772853525e-05, "loss": 0.3099, "step": 327, "task_loss": 0.92620849609375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2036980390548706, "epoch": 0.28, "learning_rate": 4.801956285472769e-05, "loss": 0.2848, "step": 328, "task_loss": 0.4006662368774414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30950790643692017, "epoch": 0.28, "learning_rate": 4.801352493660186e-05, "loss": 0.4279, "step": 329, "task_loss": 0.5127992033958435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49542707204818726, "epoch": 0.28, "learning_rate": 4.800748701847603e-05, "loss": 0.4268, "step": 330, "task_loss": 0.32095232605934143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2551426589488983, "epoch": 0.28, "learning_rate": 4.800144910035021e-05, "loss": 0.3823, "step": 331, "task_loss": 0.404095321893692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3106461763381958, "epoch": 0.28, "learning_rate": 4.799541118222437e-05, "loss": 0.2702, "step": 332, "task_loss": 0.36216187477111816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20378345251083374, "epoch": 0.28, "learning_rate": 4.798937326409854e-05, "loss": 0.2835, "step": 333, "task_loss": 0.2527221441268921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24155229330062866, "epoch": 0.28, "learning_rate": 4.7983335345972716e-05, "loss": 0.3078, "step": 334, "task_loss": 0.6740986108779907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2674604058265686, "epoch": 0.28, "learning_rate": 4.7977297427846876e-05, "loss": 0.2188, "step": 335, "task_loss": 0.6719998121261597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18219339847564697, "epoch": 0.28, "learning_rate": 4.797125950972105e-05, "loss": 0.2772, "step": 336, "task_loss": 0.08983666449785233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2661128044128418, "epoch": 0.28, "learning_rate": 4.7965221591595224e-05, "loss": 0.3587, "step": 337, "task_loss": 0.9328497648239136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4267786145210266, "epoch": 0.29, "learning_rate": 4.795918367346939e-05, "loss": 0.4127, "step": 338, "task_loss": 0.7519228458404541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33033639192581177, "epoch": 0.29, "learning_rate": 4.795314575534356e-05, "loss": 0.3252, "step": 339, "task_loss": 0.6892683506011963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23219440877437592, "epoch": 0.29, "learning_rate": 4.794710783721773e-05, "loss": 0.3117, "step": 340, "task_loss": 0.8769976496696472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30014723539352417, "epoch": 0.29, "learning_rate": 4.79410699190919e-05, "loss": 0.3359, "step": 341, "task_loss": 0.37657225131988525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22581475973129272, "epoch": 0.29, "learning_rate": 4.793503200096607e-05, "loss": 0.2733, "step": 342, "task_loss": 0.2080300748348236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4027292728424072, "epoch": 0.29, "learning_rate": 4.792899408284024e-05, "loss": 0.4094, "step": 343, "task_loss": 0.6712422370910645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3460029363632202, "epoch": 0.29, "learning_rate": 4.792295616471441e-05, "loss": 0.3638, "step": 344, "task_loss": 0.9514648914337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27742084860801697, "epoch": 0.29, "learning_rate": 4.7916918246588575e-05, "loss": 0.3456, "step": 345, "task_loss": 1.0332428216934204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2522352337837219, "epoch": 0.29, "learning_rate": 4.791088032846275e-05, "loss": 0.2361, "step": 346, "task_loss": 0.3564586639404297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2361924797296524, "epoch": 0.29, "learning_rate": 4.790484241033692e-05, "loss": 0.2914, "step": 347, "task_loss": 0.4369828999042511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24098509550094604, "epoch": 0.29, "learning_rate": 4.789880449221108e-05, "loss": 0.3358, "step": 348, "task_loss": 0.41343551874160767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3081885874271393, "epoch": 0.29, "learning_rate": 4.789276657408526e-05, "loss": 0.3309, "step": 349, "task_loss": 0.05100584030151367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31949707865715027, "epoch": 0.3, "learning_rate": 4.788672865595943e-05, "loss": 0.4014, "step": 350, "task_loss": 0.18634140491485596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16327182948589325, "epoch": 0.3, "learning_rate": 4.78806907378336e-05, "loss": 0.3068, "step": 351, "task_loss": 0.5471976399421692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3204161524772644, "epoch": 0.3, "learning_rate": 4.7874652819707766e-05, "loss": 0.3572, "step": 352, "task_loss": 0.9759595990180969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3595246374607086, "epoch": 0.3, "learning_rate": 4.786861490158194e-05, "loss": 0.4123, "step": 353, "task_loss": 0.3917081952095032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2728806734085083, "epoch": 0.3, "learning_rate": 4.786257698345611e-05, "loss": 0.3699, "step": 354, "task_loss": 0.2402394860982895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2754240334033966, "epoch": 0.3, "learning_rate": 4.7856539065330274e-05, "loss": 0.3278, "step": 355, "task_loss": 0.15423518419265747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14029163122177124, "epoch": 0.3, "learning_rate": 4.785050114720445e-05, "loss": 0.3902, "step": 356, "task_loss": 0.8633174300193787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4380922317504883, "epoch": 0.3, "learning_rate": 4.7844463229078615e-05, "loss": 0.3097, "step": 357, "task_loss": 0.24112439155578613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3287860155105591, "epoch": 0.3, "learning_rate": 4.783842531095278e-05, "loss": 0.2758, "step": 358, "task_loss": 0.789581835269928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3519061803817749, "epoch": 0.3, "learning_rate": 4.7832387392826956e-05, "loss": 0.3612, "step": 359, "task_loss": 0.6147396564483643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6181222200393677, "epoch": 0.3, "learning_rate": 4.7826349474701123e-05, "loss": 0.3759, "step": 360, "task_loss": 0.5621562004089355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39813950657844543, "epoch": 0.3, "learning_rate": 4.782031155657529e-05, "loss": 0.4228, "step": 361, "task_loss": 1.0186234712600708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4485042691230774, "epoch": 0.31, "learning_rate": 4.7814273638449465e-05, "loss": 0.3477, "step": 362, "task_loss": 0.8557292819023132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12397646903991699, "epoch": 0.31, "learning_rate": 4.780823572032364e-05, "loss": 0.3689, "step": 363, "task_loss": 0.6793264150619507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3271588087081909, "epoch": 0.31, "learning_rate": 4.7802197802197806e-05, "loss": 0.4006, "step": 364, "task_loss": 1.4454721212387085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3347281217575073, "epoch": 0.31, "learning_rate": 4.779615988407197e-05, "loss": 0.3006, "step": 365, "task_loss": 0.08478929102420807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10874610394239426, "epoch": 0.31, "learning_rate": 4.779012196594615e-05, "loss": 0.3392, "step": 366, "task_loss": 0.298480749130249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47120529413223267, "epoch": 0.31, "learning_rate": 4.7784084047820314e-05, "loss": 0.4524, "step": 367, "task_loss": 0.9685383439064026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3274350166320801, "epoch": 0.31, "learning_rate": 4.777804612969448e-05, "loss": 0.3685, "step": 368, "task_loss": 0.6706613302230835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3334848880767822, "epoch": 0.31, "learning_rate": 4.7772008211568655e-05, "loss": 0.3914, "step": 369, "task_loss": 0.2539215385913849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5926135182380676, "epoch": 0.31, "learning_rate": 4.776597029344282e-05, "loss": 0.4606, "step": 370, "task_loss": 0.6668665409088135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37934818863868713, "epoch": 0.31, "learning_rate": 4.775993237531699e-05, "loss": 0.2906, "step": 371, "task_loss": 0.030765190720558167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17070582509040833, "epoch": 0.31, "learning_rate": 4.7753894457191163e-05, "loss": 0.3119, "step": 372, "task_loss": 0.5617129802703857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19154563546180725, "epoch": 0.32, "learning_rate": 4.774785653906533e-05, "loss": 0.2885, "step": 373, "task_loss": 0.3278349041938782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12602773308753967, "epoch": 0.32, "learning_rate": 4.7741818620939505e-05, "loss": 0.331, "step": 374, "task_loss": 0.08748903125524521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26439401507377625, "epoch": 0.32, "learning_rate": 4.773578070281367e-05, "loss": 0.3115, "step": 375, "task_loss": 0.3877311646938324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38234806060791016, "epoch": 0.32, "learning_rate": 4.772974278468784e-05, "loss": 0.342, "step": 376, "task_loss": 0.30214378237724304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2441001534461975, "epoch": 0.32, "learning_rate": 4.772370486656201e-05, "loss": 0.345, "step": 377, "task_loss": 0.33088675141334534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34262794256210327, "epoch": 0.32, "learning_rate": 4.771766694843618e-05, "loss": 0.2664, "step": 378, "task_loss": 0.3423719108104706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.372978150844574, "epoch": 0.32, "learning_rate": 4.7711629030310354e-05, "loss": 0.3085, "step": 379, "task_loss": 0.1712331622838974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.264641672372818, "epoch": 0.32, "learning_rate": 4.770559111218452e-05, "loss": 0.2919, "step": 380, "task_loss": 0.6030244827270508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42999929189682007, "epoch": 0.32, "learning_rate": 4.769955319405869e-05, "loss": 0.3403, "step": 381, "task_loss": 0.6663162708282471 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38923925161361694, "epoch": 0.32, "learning_rate": 4.769351527593286e-05, "loss": 0.3191, "step": 382, "task_loss": 0.7168391346931458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42394357919692993, "epoch": 0.32, "learning_rate": 4.768747735780703e-05, "loss": 0.3547, "step": 383, "task_loss": 1.1611169576644897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33820244669914246, "epoch": 0.32, "learning_rate": 4.7681439439681204e-05, "loss": 0.3872, "step": 384, "task_loss": 0.31316763162612915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34335267543792725, "epoch": 0.33, "learning_rate": 4.767540152155537e-05, "loss": 0.4133, "step": 385, "task_loss": 0.7624664306640625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3195539116859436, "epoch": 0.33, "learning_rate": 4.766936360342954e-05, "loss": 0.3734, "step": 386, "task_loss": 0.3183731436729431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3304663896560669, "epoch": 0.33, "learning_rate": 4.766332568530371e-05, "loss": 0.4013, "step": 387, "task_loss": 0.44702717661857605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4347894787788391, "epoch": 0.33, "learning_rate": 4.765728776717788e-05, "loss": 0.4846, "step": 388, "task_loss": 0.7946614027023315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3409498631954193, "epoch": 0.33, "learning_rate": 4.7651249849052046e-05, "loss": 0.4137, "step": 389, "task_loss": 0.9257267117500305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4376046657562256, "epoch": 0.33, "learning_rate": 4.764521193092622e-05, "loss": 0.3217, "step": 390, "task_loss": 0.75620436668396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47691571712493896, "epoch": 0.33, "learning_rate": 4.763917401280039e-05, "loss": 0.2519, "step": 391, "task_loss": 0.6885778903961182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3625657260417938, "epoch": 0.33, "learning_rate": 4.7633136094674555e-05, "loss": 0.5033, "step": 392, "task_loss": 0.3627155125141144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26669323444366455, "epoch": 0.33, "learning_rate": 4.762709817654873e-05, "loss": 0.4066, "step": 393, "task_loss": 1.4566916227340698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30266720056533813, "epoch": 0.33, "learning_rate": 4.76210602584229e-05, "loss": 0.3726, "step": 394, "task_loss": 0.5834892392158508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21502426266670227, "epoch": 0.33, "learning_rate": 4.761502234029707e-05, "loss": 0.2828, "step": 395, "task_loss": 0.6627023220062256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3386573791503906, "epoch": 0.33, "learning_rate": 4.760898442217124e-05, "loss": 0.3402, "step": 396, "task_loss": 0.32269924879074097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3740113377571106, "epoch": 0.34, "learning_rate": 4.760294650404541e-05, "loss": 0.3358, "step": 397, "task_loss": 0.4154449701309204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4196130037307739, "epoch": 0.34, "learning_rate": 4.759690858591958e-05, "loss": 0.4343, "step": 398, "task_loss": 0.5105347633361816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2441515177488327, "epoch": 0.34, "learning_rate": 4.7590870667793745e-05, "loss": 0.358, "step": 399, "task_loss": 0.4545292854309082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3539087772369385, "epoch": 0.34, "learning_rate": 4.758483274966792e-05, "loss": 0.2667, "step": 400, "task_loss": 0.9259809851646423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47437068819999695, "epoch": 0.34, "learning_rate": 4.7578794831542086e-05, "loss": 0.4189, "step": 401, "task_loss": 1.0032349824905396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5229030847549438, "epoch": 0.34, "learning_rate": 4.7572756913416254e-05, "loss": 0.2971, "step": 402, "task_loss": 0.8462005853652954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3757791817188263, "epoch": 0.34, "learning_rate": 4.756671899529043e-05, "loss": 0.2793, "step": 403, "task_loss": 0.1159130185842514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3542606830596924, "epoch": 0.34, "learning_rate": 4.75606810771646e-05, "loss": 0.3693, "step": 404, "task_loss": 0.8220679759979248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4766911566257477, "epoch": 0.34, "learning_rate": 4.755464315903876e-05, "loss": 0.3797, "step": 405, "task_loss": 0.518310010433197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18795335292816162, "epoch": 0.34, "learning_rate": 4.7548605240912936e-05, "loss": 0.3843, "step": 406, "task_loss": 0.39841240644454956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3532783091068268, "epoch": 0.34, "learning_rate": 4.754256732278711e-05, "loss": 0.4428, "step": 407, "task_loss": 0.385682076215744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37540656328201294, "epoch": 0.34, "learning_rate": 4.753652940466127e-05, "loss": 0.3145, "step": 408, "task_loss": 0.7418531179428101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25664040446281433, "epoch": 0.35, "learning_rate": 4.7530491486535444e-05, "loss": 0.3971, "step": 409, "task_loss": 0.14077523350715637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24709947407245636, "epoch": 0.35, "learning_rate": 4.752445356840962e-05, "loss": 0.3414, "step": 410, "task_loss": 0.06836795061826706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44139283895492554, "epoch": 0.35, "learning_rate": 4.751841565028378e-05, "loss": 0.3348, "step": 411, "task_loss": 0.5904854536056519 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18676511943340302, "epoch": 0.35, "learning_rate": 4.751237773215795e-05, "loss": 0.3647, "step": 412, "task_loss": 0.00229077716358006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20302972197532654, "epoch": 0.35, "learning_rate": 4.7506339814032126e-05, "loss": 0.2833, "step": 413, "task_loss": 0.6538112163543701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1949908286333084, "epoch": 0.35, "learning_rate": 4.7500301895906294e-05, "loss": 0.4827, "step": 414, "task_loss": 1.1799983978271484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.643801212310791, "epoch": 0.35, "learning_rate": 4.749426397778046e-05, "loss": 0.5334, "step": 415, "task_loss": 0.4353008568286896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46924811601638794, "epoch": 0.35, "learning_rate": 4.7488226059654635e-05, "loss": 0.3695, "step": 416, "task_loss": 0.025507470592856407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.403201162815094, "epoch": 0.35, "learning_rate": 4.74821881415288e-05, "loss": 0.4249, "step": 417, "task_loss": 0.214700847864151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30432790517807007, "epoch": 0.35, "learning_rate": 4.747615022340297e-05, "loss": 0.3563, "step": 418, "task_loss": 0.5774965286254883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35530805587768555, "epoch": 0.35, "learning_rate": 4.747011230527714e-05, "loss": 0.333, "step": 419, "task_loss": 0.6179263591766357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26833927631378174, "epoch": 0.35, "learning_rate": 4.746407438715132e-05, "loss": 0.4482, "step": 420, "task_loss": 0.7877441644668579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46565455198287964, "epoch": 0.36, "learning_rate": 4.745803646902548e-05, "loss": 0.401, "step": 421, "task_loss": 0.6897768378257751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15110769867897034, "epoch": 0.36, "learning_rate": 4.745199855089965e-05, "loss": 0.2966, "step": 422, "task_loss": 0.06402797996997833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3630506098270416, "epoch": 0.36, "learning_rate": 4.7445960632773825e-05, "loss": 0.3753, "step": 423, "task_loss": 0.6175591945648193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32583174109458923, "epoch": 0.36, "learning_rate": 4.743992271464799e-05, "loss": 0.341, "step": 424, "task_loss": 0.8466601371765137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2001602053642273, "epoch": 0.36, "learning_rate": 4.743388479652216e-05, "loss": 0.2579, "step": 425, "task_loss": 0.07614468038082123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33632713556289673, "epoch": 0.36, "learning_rate": 4.7427846878396334e-05, "loss": 0.4204, "step": 426, "task_loss": 1.6080766916275024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27337777614593506, "epoch": 0.36, "learning_rate": 4.74218089602705e-05, "loss": 0.3174, "step": 427, "task_loss": 1.8618650436401367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30001145601272583, "epoch": 0.36, "learning_rate": 4.741577104214467e-05, "loss": 0.405, "step": 428, "task_loss": 1.3293850421905518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19100527465343475, "epoch": 0.36, "learning_rate": 4.740973312401884e-05, "loss": 0.3861, "step": 429, "task_loss": 0.8412354588508606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1726417988538742, "epoch": 0.36, "learning_rate": 4.740369520589301e-05, "loss": 0.3602, "step": 430, "task_loss": 1.2546541690826416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27611637115478516, "epoch": 0.36, "learning_rate": 4.7397657287767176e-05, "loss": 0.4678, "step": 431, "task_loss": 0.7354687452316284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30049943923950195, "epoch": 0.36, "learning_rate": 4.739161936964135e-05, "loss": 0.3634, "step": 432, "task_loss": 0.47865769267082214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40666812658309937, "epoch": 0.37, "learning_rate": 4.738558145151552e-05, "loss": 0.3142, "step": 433, "task_loss": 0.4740898907184601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28262925148010254, "epoch": 0.37, "learning_rate": 4.737954353338969e-05, "loss": 0.3382, "step": 434, "task_loss": 0.7306826114654541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2715093195438385, "epoch": 0.37, "learning_rate": 4.737350561526386e-05, "loss": 0.2396, "step": 435, "task_loss": 0.12085134536027908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.495231032371521, "epoch": 0.37, "learning_rate": 4.736746769713803e-05, "loss": 0.3749, "step": 436, "task_loss": 0.5271447896957397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2588498592376709, "epoch": 0.37, "learning_rate": 4.73614297790122e-05, "loss": 0.3323, "step": 437, "task_loss": 1.2773017883300781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3426035940647125, "epoch": 0.37, "learning_rate": 4.735539186088637e-05, "loss": 0.3988, "step": 438, "task_loss": 1.2968825101852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3027150630950928, "epoch": 0.37, "learning_rate": 4.734935394276054e-05, "loss": 0.3351, "step": 439, "task_loss": 0.8464741110801697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28821468353271484, "epoch": 0.37, "learning_rate": 4.734331602463471e-05, "loss": 0.4304, "step": 440, "task_loss": 0.5745480060577393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22773291170597076, "epoch": 0.37, "learning_rate": 4.7337278106508875e-05, "loss": 0.3224, "step": 441, "task_loss": 0.12265873700380325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2685242295265198, "epoch": 0.37, "learning_rate": 4.733124018838305e-05, "loss": 0.2533, "step": 442, "task_loss": 0.9167185425758362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3029676675796509, "epoch": 0.37, "learning_rate": 4.7325202270257216e-05, "loss": 0.3903, "step": 443, "task_loss": 0.4724733829498291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34784698486328125, "epoch": 0.38, "learning_rate": 4.731916435213139e-05, "loss": 0.4061, "step": 444, "task_loss": 0.4826476573944092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4083378314971924, "epoch": 0.38, "learning_rate": 4.731312643400556e-05, "loss": 0.4403, "step": 445, "task_loss": 0.7389659285545349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19734838604927063, "epoch": 0.38, "learning_rate": 4.7307088515879725e-05, "loss": 0.2514, "step": 446, "task_loss": 0.36828845739364624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2308603823184967, "epoch": 0.38, "learning_rate": 4.73010505977539e-05, "loss": 0.3262, "step": 447, "task_loss": 0.8120452165603638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22636263072490692, "epoch": 0.38, "learning_rate": 4.7295012679628066e-05, "loss": 0.5631, "step": 448, "task_loss": 0.8329577445983887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.435597687959671, "epoch": 0.38, "learning_rate": 4.728897476150223e-05, "loss": 0.4089, "step": 449, "task_loss": 1.5263049602508545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20194268226623535, "epoch": 0.38, "learning_rate": 4.728293684337641e-05, "loss": 0.3599, "step": 450, "task_loss": 0.2185748815536499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18315492570400238, "epoch": 0.38, "learning_rate": 4.7276898925250574e-05, "loss": 0.2685, "step": 451, "task_loss": 0.3106297552585602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1951131373643875, "epoch": 0.38, "learning_rate": 4.727086100712475e-05, "loss": 0.2201, "step": 452, "task_loss": 0.11870678514242172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19492274522781372, "epoch": 0.38, "learning_rate": 4.7264823088998915e-05, "loss": 0.3888, "step": 453, "task_loss": 0.22582919895648956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2240012139081955, "epoch": 0.38, "learning_rate": 4.725878517087309e-05, "loss": 0.3901, "step": 454, "task_loss": 0.5290884375572205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18829910457134247, "epoch": 0.38, "learning_rate": 4.7252747252747257e-05, "loss": 0.2296, "step": 455, "task_loss": 0.753002941608429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2122923731803894, "epoch": 0.39, "learning_rate": 4.7246709334621424e-05, "loss": 0.307, "step": 456, "task_loss": 0.0786176398396492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30417200922966003, "epoch": 0.39, "learning_rate": 4.72406714164956e-05, "loss": 0.2403, "step": 457, "task_loss": 0.3950548768043518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14613334834575653, "epoch": 0.39, "learning_rate": 4.7234633498369765e-05, "loss": 0.4295, "step": 458, "task_loss": 0.9901432991027832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19673016667366028, "epoch": 0.39, "learning_rate": 4.722859558024393e-05, "loss": 0.3662, "step": 459, "task_loss": 0.9414348006248474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2953929305076599, "epoch": 0.39, "learning_rate": 4.7222557662118106e-05, "loss": 0.4012, "step": 460, "task_loss": 0.8229372501373291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2553478181362152, "epoch": 0.39, "learning_rate": 4.721651974399227e-05, "loss": 0.3498, "step": 461, "task_loss": 1.105999231338501 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30003100633621216, "epoch": 0.39, "learning_rate": 4.721048182586644e-05, "loss": 0.3241, "step": 462, "task_loss": 0.20932143926620483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29439473152160645, "epoch": 0.39, "learning_rate": 4.7204443907740614e-05, "loss": 0.3477, "step": 463, "task_loss": 0.3300442397594452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6319240927696228, "epoch": 0.39, "learning_rate": 4.719840598961479e-05, "loss": 0.3711, "step": 464, "task_loss": 0.45971715450286865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37110820412635803, "epoch": 0.39, "learning_rate": 4.719236807148895e-05, "loss": 0.3154, "step": 465, "task_loss": 0.47261321544647217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3097112774848938, "epoch": 0.39, "learning_rate": 4.718633015336312e-05, "loss": 0.2648, "step": 466, "task_loss": 0.5097870230674744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36473336815834045, "epoch": 0.39, "learning_rate": 4.7180292235237297e-05, "loss": 0.2702, "step": 467, "task_loss": 0.4184640049934387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46796947717666626, "epoch": 0.4, "learning_rate": 4.7174254317111464e-05, "loss": 0.3551, "step": 468, "task_loss": 0.5642027258872986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16709420084953308, "epoch": 0.4, "learning_rate": 4.716821639898563e-05, "loss": 0.2777, "step": 469, "task_loss": 0.03220021724700928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5440744161605835, "epoch": 0.4, "learning_rate": 4.7162178480859805e-05, "loss": 0.4067, "step": 470, "task_loss": 0.8898472189903259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37732553482055664, "epoch": 0.4, "learning_rate": 4.715614056273397e-05, "loss": 0.3477, "step": 471, "task_loss": 0.9021671414375305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3206155598163605, "epoch": 0.4, "learning_rate": 4.715010264460814e-05, "loss": 0.3562, "step": 472, "task_loss": 0.9387491345405579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34356531500816345, "epoch": 0.4, "learning_rate": 4.714406472648231e-05, "loss": 0.3109, "step": 473, "task_loss": 0.5287536978721619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3042271137237549, "epoch": 0.4, "learning_rate": 4.713802680835648e-05, "loss": 0.2381, "step": 474, "task_loss": 0.3828946650028229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44171178340911865, "epoch": 0.4, "learning_rate": 4.713198889023065e-05, "loss": 0.3581, "step": 475, "task_loss": 0.5084564089775085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21827548742294312, "epoch": 0.4, "learning_rate": 4.712595097210482e-05, "loss": 0.3171, "step": 476, "task_loss": 0.6665694713592529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22156867384910583, "epoch": 0.4, "learning_rate": 4.7119913053978996e-05, "loss": 0.3437, "step": 477, "task_loss": 0.12441891431808472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4872015714645386, "epoch": 0.4, "learning_rate": 4.7113875135853156e-05, "loss": 0.432, "step": 478, "task_loss": 1.1194671392440796 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26371294260025024, "epoch": 0.4, "learning_rate": 4.710783721772733e-05, "loss": 0.4232, "step": 479, "task_loss": 1.1028268337249756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.283963680267334, "epoch": 0.41, "learning_rate": 4.7101799299601504e-05, "loss": 0.3808, "step": 480, "task_loss": 0.8595671653747559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4099663496017456, "epoch": 0.41, "learning_rate": 4.7095761381475664e-05, "loss": 0.3359, "step": 481, "task_loss": 1.1608103513717651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3045732080936432, "epoch": 0.41, "learning_rate": 4.708972346334984e-05, "loss": 0.3399, "step": 482, "task_loss": 0.3181804418563843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34263813495635986, "epoch": 0.41, "learning_rate": 4.708368554522401e-05, "loss": 0.32, "step": 483, "task_loss": 0.6213028430938721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2232375591993332, "epoch": 0.41, "learning_rate": 4.707764762709818e-05, "loss": 0.2959, "step": 484, "task_loss": 0.37370654940605164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2694361209869385, "epoch": 0.41, "learning_rate": 4.7071609708972347e-05, "loss": 0.3881, "step": 485, "task_loss": 1.0189025402069092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2621615529060364, "epoch": 0.41, "learning_rate": 4.706557179084652e-05, "loss": 0.378, "step": 486, "task_loss": 0.8326951265335083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19903703033924103, "epoch": 0.41, "learning_rate": 4.705953387272069e-05, "loss": 0.2651, "step": 487, "task_loss": 0.6074106693267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2726070284843445, "epoch": 0.41, "learning_rate": 4.7053495954594855e-05, "loss": 0.2451, "step": 488, "task_loss": 0.2530330419540405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27715301513671875, "epoch": 0.41, "learning_rate": 4.704745803646903e-05, "loss": 0.3208, "step": 489, "task_loss": 0.4281701445579529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38308075070381165, "epoch": 0.41, "learning_rate": 4.7041420118343196e-05, "loss": 0.4538, "step": 490, "task_loss": 0.2543697953224182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3218812346458435, "epoch": 0.41, "learning_rate": 4.703538220021736e-05, "loss": 0.3782, "step": 491, "task_loss": 0.6149786710739136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5539299249649048, "epoch": 0.42, "learning_rate": 4.702934428209154e-05, "loss": 0.4913, "step": 492, "task_loss": 0.5124923586845398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5299365520477295, "epoch": 0.42, "learning_rate": 4.702330636396571e-05, "loss": 0.3712, "step": 493, "task_loss": 0.6752091646194458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34560248255729675, "epoch": 0.42, "learning_rate": 4.701726844583988e-05, "loss": 0.4371, "step": 494, "task_loss": 0.14114762842655182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33097970485687256, "epoch": 0.42, "learning_rate": 4.7011230527714045e-05, "loss": 0.4242, "step": 495, "task_loss": 0.2937622666358948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36671313643455505, "epoch": 0.42, "learning_rate": 4.700519260958822e-05, "loss": 0.3409, "step": 496, "task_loss": 0.9777697324752808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5165556073188782, "epoch": 0.42, "learning_rate": 4.6999154691462387e-05, "loss": 0.3776, "step": 497, "task_loss": 0.45647016167640686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44569462537765503, "epoch": 0.42, "learning_rate": 4.6993116773336554e-05, "loss": 0.4087, "step": 498, "task_loss": 0.5437862873077393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23247092962265015, "epoch": 0.42, "learning_rate": 4.698707885521073e-05, "loss": 0.2865, "step": 499, "task_loss": 0.5779015421867371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36501020193099976, "epoch": 0.42, "learning_rate": 4.6981040937084895e-05, "loss": 0.2977, "step": 500, "task_loss": 0.02872415818274021 }, { "epoch": 0.42, "eval_accuracy": 0.9111683168316832, "eval_loss": 0.19494682550430298, "eval_runtime": 338.708, "eval_samples_per_second": 74.548, "eval_steps_per_second": 0.585, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3552286624908447, "epoch": 0.42, "learning_rate": 4.697500301895906e-05, "loss": 0.3759, "step": 501, "task_loss": 1.1596523523330688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29694682359695435, "epoch": 0.42, "learning_rate": 4.6968965100833236e-05, "loss": 0.341, "step": 502, "task_loss": 0.24640515446662903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2870936989784241, "epoch": 0.42, "learning_rate": 4.69629271827074e-05, "loss": 0.3098, "step": 503, "task_loss": 0.6115170121192932 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26924842596054077, "epoch": 0.43, "learning_rate": 4.695688926458158e-05, "loss": 0.4013, "step": 504, "task_loss": 0.8527316451072693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3132873475551605, "epoch": 0.43, "learning_rate": 4.6950851346455744e-05, "loss": 0.2732, "step": 505, "task_loss": 0.551632285118103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20017537474632263, "epoch": 0.43, "learning_rate": 4.694481342832991e-05, "loss": 0.2497, "step": 506, "task_loss": 0.4739929139614105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4083116352558136, "epoch": 0.43, "learning_rate": 4.6938775510204086e-05, "loss": 0.2903, "step": 507, "task_loss": 0.42030954360961914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12615306675434113, "epoch": 0.43, "learning_rate": 4.693273759207825e-05, "loss": 0.2107, "step": 508, "task_loss": 0.1609063297510147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29174333810806274, "epoch": 0.43, "learning_rate": 4.692669967395243e-05, "loss": 0.3058, "step": 509, "task_loss": 0.7792858481407166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2843620181083679, "epoch": 0.43, "learning_rate": 4.6920661755826594e-05, "loss": 0.3742, "step": 510, "task_loss": 0.4710130989551544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30938857793807983, "epoch": 0.43, "learning_rate": 4.691462383770076e-05, "loss": 0.4092, "step": 511, "task_loss": 0.7510309219360352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23981353640556335, "epoch": 0.43, "learning_rate": 4.6908585919574935e-05, "loss": 0.4148, "step": 512, "task_loss": 0.6449210047721863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4427306056022644, "epoch": 0.43, "learning_rate": 4.69025480014491e-05, "loss": 0.3666, "step": 513, "task_loss": 0.3347684442996979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21351030468940735, "epoch": 0.43, "learning_rate": 4.6896510083323276e-05, "loss": 0.3193, "step": 514, "task_loss": 1.0636972188949585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2566937804222107, "epoch": 0.44, "learning_rate": 4.689047216519744e-05, "loss": 0.3709, "step": 515, "task_loss": 0.7252180576324463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35753583908081055, "epoch": 0.44, "learning_rate": 4.688443424707161e-05, "loss": 0.3494, "step": 516, "task_loss": 0.5258875489234924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5033993721008301, "epoch": 0.44, "learning_rate": 4.6878396328945784e-05, "loss": 0.3532, "step": 517, "task_loss": 1.105150818824768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17686602473258972, "epoch": 0.44, "learning_rate": 4.687235841081995e-05, "loss": 0.2566, "step": 518, "task_loss": 0.40853849053382874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32396629452705383, "epoch": 0.44, "learning_rate": 4.686632049269412e-05, "loss": 0.3318, "step": 519, "task_loss": 1.226879358291626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4283577501773834, "epoch": 0.44, "learning_rate": 4.686028257456829e-05, "loss": 0.2728, "step": 520, "task_loss": 0.18439847230911255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5258373022079468, "epoch": 0.44, "learning_rate": 4.685424465644246e-05, "loss": 0.4484, "step": 521, "task_loss": 0.3932799994945526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2439127266407013, "epoch": 0.44, "learning_rate": 4.684820673831663e-05, "loss": 0.3316, "step": 522, "task_loss": 0.43680280447006226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2758333086967468, "epoch": 0.44, "learning_rate": 4.68421688201908e-05, "loss": 0.2995, "step": 523, "task_loss": 0.6799713969230652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.575778067111969, "epoch": 0.44, "learning_rate": 4.6836130902064975e-05, "loss": 0.4034, "step": 524, "task_loss": 0.8700759410858154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23655471205711365, "epoch": 0.44, "learning_rate": 4.683009298393914e-05, "loss": 0.3195, "step": 525, "task_loss": 0.4206683039665222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18472610414028168, "epoch": 0.44, "learning_rate": 4.682405506581331e-05, "loss": 0.3069, "step": 526, "task_loss": 0.5916554927825928 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19131658971309662, "epoch": 0.45, "learning_rate": 4.6818017147687483e-05, "loss": 0.298, "step": 527, "task_loss": 0.8907945156097412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3965601325035095, "epoch": 0.45, "learning_rate": 4.681197922956165e-05, "loss": 0.4328, "step": 528, "task_loss": 0.2660832405090332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24752244353294373, "epoch": 0.45, "learning_rate": 4.680594131143582e-05, "loss": 0.2761, "step": 529, "task_loss": 0.1341249942779541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11898336559534073, "epoch": 0.45, "learning_rate": 4.679990339330999e-05, "loss": 0.3044, "step": 530, "task_loss": 0.5121662616729736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45424357056617737, "epoch": 0.45, "learning_rate": 4.679386547518416e-05, "loss": 0.3614, "step": 531, "task_loss": 0.2839154601097107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3627973198890686, "epoch": 0.45, "learning_rate": 4.6787827557058326e-05, "loss": 0.4045, "step": 532, "task_loss": 0.6190841197967529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5654903054237366, "epoch": 0.45, "learning_rate": 4.67817896389325e-05, "loss": 0.412, "step": 533, "task_loss": 0.34364384412765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27257686853408813, "epoch": 0.45, "learning_rate": 4.677575172080667e-05, "loss": 0.3734, "step": 534, "task_loss": 1.0794323682785034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20569965243339539, "epoch": 0.45, "learning_rate": 4.6769713802680834e-05, "loss": 0.2292, "step": 535, "task_loss": 0.13309672474861145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2676234543323517, "epoch": 0.45, "learning_rate": 4.676367588455501e-05, "loss": 0.3371, "step": 536, "task_loss": 0.4787544310092926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3429715037345886, "epoch": 0.45, "learning_rate": 4.675763796642918e-05, "loss": 0.4323, "step": 537, "task_loss": 1.0065019130706787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2396705150604248, "epoch": 0.45, "learning_rate": 4.675160004830334e-05, "loss": 0.2787, "step": 538, "task_loss": 0.6923638582229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40023157000541687, "epoch": 0.46, "learning_rate": 4.674556213017752e-05, "loss": 0.3016, "step": 539, "task_loss": 0.7968195676803589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2712099850177765, "epoch": 0.46, "learning_rate": 4.673952421205169e-05, "loss": 0.3879, "step": 540, "task_loss": 0.409343421459198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31481483578681946, "epoch": 0.46, "learning_rate": 4.673348629392585e-05, "loss": 0.3544, "step": 541, "task_loss": 0.6697490215301514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2298501580953598, "epoch": 0.46, "learning_rate": 4.6727448375800025e-05, "loss": 0.2573, "step": 542, "task_loss": 0.6586454510688782 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3335941731929779, "epoch": 0.46, "learning_rate": 4.67214104576742e-05, "loss": 0.2974, "step": 543, "task_loss": 1.0023521184921265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46105095744132996, "epoch": 0.46, "learning_rate": 4.6715372539548366e-05, "loss": 0.363, "step": 544, "task_loss": 0.6028873920440674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3821551501750946, "epoch": 0.46, "learning_rate": 4.670933462142253e-05, "loss": 0.3881, "step": 545, "task_loss": 0.9498618245124817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1347273886203766, "epoch": 0.46, "learning_rate": 4.670329670329671e-05, "loss": 0.2558, "step": 546, "task_loss": 0.15808208286762238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27748334407806396, "epoch": 0.46, "learning_rate": 4.6697258785170875e-05, "loss": 0.331, "step": 547, "task_loss": 0.5437588691711426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31439855694770813, "epoch": 0.46, "learning_rate": 4.669122086704504e-05, "loss": 0.2868, "step": 548, "task_loss": 0.6029009222984314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36170822381973267, "epoch": 0.46, "learning_rate": 4.6685182948919216e-05, "loss": 0.4417, "step": 549, "task_loss": 1.6580506563186646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23689739406108856, "epoch": 0.46, "learning_rate": 4.667914503079339e-05, "loss": 0.3143, "step": 550, "task_loss": 0.8231652975082397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19411659240722656, "epoch": 0.47, "learning_rate": 4.667310711266755e-05, "loss": 0.2952, "step": 551, "task_loss": 0.8502408862113953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17974667251110077, "epoch": 0.47, "learning_rate": 4.6667069194541724e-05, "loss": 0.3763, "step": 552, "task_loss": 1.0208184719085693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15332043170928955, "epoch": 0.47, "learning_rate": 4.66610312764159e-05, "loss": 0.3972, "step": 553, "task_loss": 1.0831626653671265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16559112071990967, "epoch": 0.47, "learning_rate": 4.665499335829006e-05, "loss": 0.3241, "step": 554, "task_loss": 0.13271448016166687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2002086192369461, "epoch": 0.47, "learning_rate": 4.664895544016423e-05, "loss": 0.3304, "step": 555, "task_loss": 0.3389005661010742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2827780246734619, "epoch": 0.47, "learning_rate": 4.6642917522038406e-05, "loss": 0.2844, "step": 556, "task_loss": 0.3220231235027313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35322192311286926, "epoch": 0.47, "learning_rate": 4.6636879603912573e-05, "loss": 0.3659, "step": 557, "task_loss": 1.4571306705474854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1761866956949234, "epoch": 0.47, "learning_rate": 4.663084168578674e-05, "loss": 0.2108, "step": 558, "task_loss": 0.1433708518743515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4945336580276489, "epoch": 0.47, "learning_rate": 4.6624803767660915e-05, "loss": 0.3537, "step": 559, "task_loss": 0.603844165802002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3818495571613312, "epoch": 0.47, "learning_rate": 4.661876584953508e-05, "loss": 0.3609, "step": 560, "task_loss": 0.8040283918380737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4740597903728485, "epoch": 0.47, "learning_rate": 4.661272793140925e-05, "loss": 0.3636, "step": 561, "task_loss": 0.45497509837150574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46093040704727173, "epoch": 0.47, "learning_rate": 4.660669001328342e-05, "loss": 0.4507, "step": 562, "task_loss": 0.8263192176818848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40767866373062134, "epoch": 0.48, "learning_rate": 4.660065209515759e-05, "loss": 0.3164, "step": 563, "task_loss": 0.8162437081336975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33189451694488525, "epoch": 0.48, "learning_rate": 4.659461417703176e-05, "loss": 0.294, "step": 564, "task_loss": 1.0901103019714355 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36081790924072266, "epoch": 0.48, "learning_rate": 4.658857625890593e-05, "loss": 0.323, "step": 565, "task_loss": 0.41519632935523987 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2105311155319214, "epoch": 0.48, "learning_rate": 4.6582538340780105e-05, "loss": 0.2567, "step": 566, "task_loss": 0.1116391196846962 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24808262288570404, "epoch": 0.48, "learning_rate": 4.657650042265427e-05, "loss": 0.4331, "step": 567, "task_loss": 1.3634008169174194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15797454118728638, "epoch": 0.48, "learning_rate": 4.657046250452844e-05, "loss": 0.4287, "step": 568, "task_loss": 0.07788126170635223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.536435604095459, "epoch": 0.48, "learning_rate": 4.6564424586402614e-05, "loss": 0.4089, "step": 569, "task_loss": 0.877366840839386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.454207181930542, "epoch": 0.48, "learning_rate": 4.655838666827678e-05, "loss": 0.3116, "step": 570, "task_loss": 0.3642159104347229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20970717072486877, "epoch": 0.48, "learning_rate": 4.655234875015095e-05, "loss": 0.3425, "step": 571, "task_loss": 0.6273272633552551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30841565132141113, "epoch": 0.48, "learning_rate": 4.654631083202512e-05, "loss": 0.2941, "step": 572, "task_loss": 0.1949205994606018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43708956241607666, "epoch": 0.48, "learning_rate": 4.654027291389929e-05, "loss": 0.3344, "step": 573, "task_loss": 0.8806980848312378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21233753859996796, "epoch": 0.48, "learning_rate": 4.6534234995773456e-05, "loss": 0.3196, "step": 574, "task_loss": 0.7414767146110535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2978823184967041, "epoch": 0.49, "learning_rate": 4.652819707764763e-05, "loss": 0.4029, "step": 575, "task_loss": 0.48946142196655273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4036892056465149, "epoch": 0.49, "learning_rate": 4.65221591595218e-05, "loss": 0.3856, "step": 576, "task_loss": 0.436568945646286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2572465240955353, "epoch": 0.49, "learning_rate": 4.651612124139597e-05, "loss": 0.393, "step": 577, "task_loss": 1.0481715202331543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3467211127281189, "epoch": 0.49, "learning_rate": 4.651008332327014e-05, "loss": 0.372, "step": 578, "task_loss": 0.842318058013916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30919787287712097, "epoch": 0.49, "learning_rate": 4.6504045405144306e-05, "loss": 0.4773, "step": 579, "task_loss": 1.1685787439346313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2926180064678192, "epoch": 0.49, "learning_rate": 4.649800748701848e-05, "loss": 0.3399, "step": 580, "task_loss": 0.8706581592559814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23800015449523926, "epoch": 0.49, "learning_rate": 4.649196956889265e-05, "loss": 0.327, "step": 581, "task_loss": 0.5887655019760132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5023128986358643, "epoch": 0.49, "learning_rate": 4.648593165076682e-05, "loss": 0.4798, "step": 582, "task_loss": 1.2295455932617188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24959447979927063, "epoch": 0.49, "learning_rate": 4.647989373264099e-05, "loss": 0.3407, "step": 583, "task_loss": 0.19351419806480408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1406758725643158, "epoch": 0.49, "learning_rate": 4.6473855814515155e-05, "loss": 0.3282, "step": 584, "task_loss": 1.069644808769226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3988448977470398, "epoch": 0.49, "learning_rate": 4.646781789638933e-05, "loss": 0.4157, "step": 585, "task_loss": 1.2030136585235596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21727235615253448, "epoch": 0.5, "learning_rate": 4.6461779978263496e-05, "loss": 0.351, "step": 586, "task_loss": 0.13985687494277954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32491010427474976, "epoch": 0.5, "learning_rate": 4.645574206013767e-05, "loss": 0.3861, "step": 587, "task_loss": 0.5897377729415894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.526580274105072, "epoch": 0.5, "learning_rate": 4.644970414201184e-05, "loss": 0.3875, "step": 588, "task_loss": 0.8688368201255798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19158503413200378, "epoch": 0.5, "learning_rate": 4.6443666223886005e-05, "loss": 0.3536, "step": 589, "task_loss": 0.3490241467952728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35543888807296753, "epoch": 0.5, "learning_rate": 4.643762830576018e-05, "loss": 0.3452, "step": 590, "task_loss": 0.9998995661735535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1599348485469818, "epoch": 0.5, "learning_rate": 4.6431590387634346e-05, "loss": 0.3203, "step": 591, "task_loss": 0.18656522035598755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2544928789138794, "epoch": 0.5, "learning_rate": 4.642555246950851e-05, "loss": 0.3223, "step": 592, "task_loss": 0.25684264302253723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2261628806591034, "epoch": 0.5, "learning_rate": 4.641951455138269e-05, "loss": 0.2666, "step": 593, "task_loss": 0.2724857032299042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14076194167137146, "epoch": 0.5, "learning_rate": 4.6413476633256854e-05, "loss": 0.2639, "step": 594, "task_loss": 0.298418790102005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30404239892959595, "epoch": 0.5, "learning_rate": 4.640743871513102e-05, "loss": 0.3223, "step": 595, "task_loss": 0.3522747755050659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.308472216129303, "epoch": 0.5, "learning_rate": 4.6401400797005195e-05, "loss": 0.3031, "step": 596, "task_loss": 1.0812458992004395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22775940597057343, "epoch": 0.5, "learning_rate": 4.639536287887937e-05, "loss": 0.3519, "step": 597, "task_loss": 0.32643061876296997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3233962059020996, "epoch": 0.51, "learning_rate": 4.638932496075353e-05, "loss": 0.4218, "step": 598, "task_loss": 0.7972854375839233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24168744683265686, "epoch": 0.51, "learning_rate": 4.6383287042627704e-05, "loss": 0.3266, "step": 599, "task_loss": 0.3753623366355896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4203510582447052, "epoch": 0.51, "learning_rate": 4.637724912450188e-05, "loss": 0.4241, "step": 600, "task_loss": 1.0043474435806274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23040008544921875, "epoch": 0.51, "learning_rate": 4.6371211206376045e-05, "loss": 0.4598, "step": 601, "task_loss": 1.4317598342895508 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2298530638217926, "epoch": 0.51, "learning_rate": 4.636517328825021e-05, "loss": 0.3663, "step": 602, "task_loss": 0.6105636358261108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4078867733478546, "epoch": 0.51, "learning_rate": 4.6359135370124386e-05, "loss": 0.2485, "step": 603, "task_loss": 0.3292267322540283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5903968811035156, "epoch": 0.51, "learning_rate": 4.635309745199855e-05, "loss": 0.3066, "step": 604, "task_loss": 1.089480996131897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29476842284202576, "epoch": 0.51, "learning_rate": 4.634705953387272e-05, "loss": 0.3085, "step": 605, "task_loss": 0.45623448491096497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14264142513275146, "epoch": 0.51, "learning_rate": 4.6341021615746894e-05, "loss": 0.2667, "step": 606, "task_loss": 0.05452156066894531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4609898626804352, "epoch": 0.51, "learning_rate": 4.633498369762107e-05, "loss": 0.3163, "step": 607, "task_loss": 0.44029584527015686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6171891689300537, "epoch": 0.51, "learning_rate": 4.632894577949523e-05, "loss": 0.4315, "step": 608, "task_loss": 0.9165663719177246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3013846278190613, "epoch": 0.51, "learning_rate": 4.63229078613694e-05, "loss": 0.3415, "step": 609, "task_loss": 0.63967365026474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2746434807777405, "epoch": 0.52, "learning_rate": 4.6316869943243576e-05, "loss": 0.3267, "step": 610, "task_loss": 0.5338064432144165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3651568293571472, "epoch": 0.52, "learning_rate": 4.631083202511774e-05, "loss": 0.3393, "step": 611, "task_loss": 1.079981803894043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23759052157402039, "epoch": 0.52, "learning_rate": 4.630479410699191e-05, "loss": 0.3861, "step": 612, "task_loss": 0.6698994636535645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5082151293754578, "epoch": 0.52, "learning_rate": 4.6298756188866085e-05, "loss": 0.451, "step": 613, "task_loss": 0.8963048458099365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2931552231311798, "epoch": 0.52, "learning_rate": 4.6292718270740245e-05, "loss": 0.3718, "step": 614, "task_loss": 1.26703941822052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.284406453371048, "epoch": 0.52, "learning_rate": 4.628668035261442e-05, "loss": 0.4436, "step": 615, "task_loss": 1.153813362121582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.346671462059021, "epoch": 0.52, "learning_rate": 4.628064243448859e-05, "loss": 0.3822, "step": 616, "task_loss": 1.0603476762771606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2777397632598877, "epoch": 0.52, "learning_rate": 4.627460451636276e-05, "loss": 0.3429, "step": 617, "task_loss": 1.0770553350448608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5762258768081665, "epoch": 0.52, "learning_rate": 4.626856659823693e-05, "loss": 0.4253, "step": 618, "task_loss": 0.914618194103241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4495653808116913, "epoch": 0.52, "learning_rate": 4.62625286801111e-05, "loss": 0.4085, "step": 619, "task_loss": 0.21080175042152405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27887922525405884, "epoch": 0.52, "learning_rate": 4.625649076198527e-05, "loss": 0.4165, "step": 620, "task_loss": 0.5042296648025513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3461983799934387, "epoch": 0.52, "learning_rate": 4.6250452843859436e-05, "loss": 0.3263, "step": 621, "task_loss": 1.0598671436309814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32994264364242554, "epoch": 0.53, "learning_rate": 4.624441492573361e-05, "loss": 0.3668, "step": 622, "task_loss": 1.159314513206482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22066111862659454, "epoch": 0.53, "learning_rate": 4.6238377007607784e-05, "loss": 0.3031, "step": 623, "task_loss": 0.10674968361854553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43864771723747253, "epoch": 0.53, "learning_rate": 4.6232339089481944e-05, "loss": 0.396, "step": 624, "task_loss": 0.5357619524002075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4029865264892578, "epoch": 0.53, "learning_rate": 4.622630117135612e-05, "loss": 0.4925, "step": 625, "task_loss": 0.5318986177444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.205038383603096, "epoch": 0.53, "learning_rate": 4.622026325323029e-05, "loss": 0.2959, "step": 626, "task_loss": 1.0308891534805298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6130087375640869, "epoch": 0.53, "learning_rate": 4.621422533510446e-05, "loss": 0.4259, "step": 627, "task_loss": 0.47725486755371094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3744767904281616, "epoch": 0.53, "learning_rate": 4.6208187416978626e-05, "loss": 0.3554, "step": 628, "task_loss": 0.7097012996673584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2224428802728653, "epoch": 0.53, "learning_rate": 4.62021494988528e-05, "loss": 0.2753, "step": 629, "task_loss": 0.8521153926849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4066242575645447, "epoch": 0.53, "learning_rate": 4.619611158072697e-05, "loss": 0.3714, "step": 630, "task_loss": 1.2141882181167603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3314343988895416, "epoch": 0.53, "learning_rate": 4.6190073662601135e-05, "loss": 0.2743, "step": 631, "task_loss": 0.4873604476451874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31800585985183716, "epoch": 0.53, "learning_rate": 4.618403574447531e-05, "loss": 0.3455, "step": 632, "task_loss": 0.9237346053123474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.579411506652832, "epoch": 0.53, "learning_rate": 4.6177997826349476e-05, "loss": 0.3566, "step": 633, "task_loss": 0.38747143745422363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24865508079528809, "epoch": 0.54, "learning_rate": 4.617195990822364e-05, "loss": 0.3163, "step": 634, "task_loss": 0.8504195213317871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21511569619178772, "epoch": 0.54, "learning_rate": 4.616592199009782e-05, "loss": 0.3294, "step": 635, "task_loss": 0.5050124526023865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2974485158920288, "epoch": 0.54, "learning_rate": 4.6159884071971984e-05, "loss": 0.4025, "step": 636, "task_loss": 1.4122283458709717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32975679636001587, "epoch": 0.54, "learning_rate": 4.615384615384616e-05, "loss": 0.3319, "step": 637, "task_loss": 0.7846709489822388 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25379478931427, "epoch": 0.54, "learning_rate": 4.6147808235720325e-05, "loss": 0.3251, "step": 638, "task_loss": 1.0721712112426758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4846215546131134, "epoch": 0.54, "learning_rate": 4.61417703175945e-05, "loss": 0.4351, "step": 639, "task_loss": 0.22067594528198242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5852885246276855, "epoch": 0.54, "learning_rate": 4.6135732399468666e-05, "loss": 0.3507, "step": 640, "task_loss": 0.4556126296520233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13487397134304047, "epoch": 0.54, "learning_rate": 4.6129694481342834e-05, "loss": 0.2611, "step": 641, "task_loss": 0.19986118376255035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27105093002319336, "epoch": 0.54, "learning_rate": 4.612365656321701e-05, "loss": 0.3615, "step": 642, "task_loss": 1.0428903102874756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2862633466720581, "epoch": 0.54, "learning_rate": 4.6117618645091175e-05, "loss": 0.4324, "step": 643, "task_loss": 1.042823314666748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17515414953231812, "epoch": 0.54, "learning_rate": 4.611158072696534e-05, "loss": 0.2985, "step": 644, "task_loss": 0.28693491220474243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24967364966869354, "epoch": 0.54, "learning_rate": 4.6105542808839516e-05, "loss": 0.3359, "step": 645, "task_loss": 0.8768274784088135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2654966115951538, "epoch": 0.55, "learning_rate": 4.609950489071368e-05, "loss": 0.3514, "step": 646, "task_loss": 0.2290060669183731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6174731254577637, "epoch": 0.55, "learning_rate": 4.609346697258786e-05, "loss": 0.394, "step": 647, "task_loss": 0.34061920642852783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2054482102394104, "epoch": 0.55, "learning_rate": 4.6087429054462024e-05, "loss": 0.4386, "step": 648, "task_loss": 0.3258286118507385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40916475653648376, "epoch": 0.55, "learning_rate": 4.608139113633619e-05, "loss": 0.3831, "step": 649, "task_loss": 0.4465845227241516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3240598142147064, "epoch": 0.55, "learning_rate": 4.6075353218210365e-05, "loss": 0.3164, "step": 650, "task_loss": 0.9258898496627808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.314564049243927, "epoch": 0.55, "learning_rate": 4.606931530008453e-05, "loss": 0.3354, "step": 651, "task_loss": 0.9715918898582458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24291005730628967, "epoch": 0.55, "learning_rate": 4.60632773819587e-05, "loss": 0.3064, "step": 652, "task_loss": 0.5996285080909729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24056079983711243, "epoch": 0.55, "learning_rate": 4.6057239463832874e-05, "loss": 0.4083, "step": 653, "task_loss": 0.1843840479850769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3044561743736267, "epoch": 0.55, "learning_rate": 4.605120154570704e-05, "loss": 0.3245, "step": 654, "task_loss": 0.6563989520072937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5701591968536377, "epoch": 0.55, "learning_rate": 4.6045163627581215e-05, "loss": 0.3578, "step": 655, "task_loss": 0.16813500225543976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26367518305778503, "epoch": 0.55, "learning_rate": 4.603912570945538e-05, "loss": 0.4461, "step": 656, "task_loss": 0.5315459370613098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3030101954936981, "epoch": 0.56, "learning_rate": 4.6033087791329556e-05, "loss": 0.344, "step": 657, "task_loss": 0.7940163016319275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23225125670433044, "epoch": 0.56, "learning_rate": 4.602704987320372e-05, "loss": 0.3637, "step": 658, "task_loss": 0.5889531970024109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33130282163619995, "epoch": 0.56, "learning_rate": 4.602101195507789e-05, "loss": 0.4226, "step": 659, "task_loss": 0.8525395393371582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20055104792118073, "epoch": 0.56, "learning_rate": 4.6014974036952064e-05, "loss": 0.4213, "step": 660, "task_loss": 0.3496905565261841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5440434217453003, "epoch": 0.56, "learning_rate": 4.600893611882623e-05, "loss": 0.448, "step": 661, "task_loss": 0.896485447883606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2198769897222519, "epoch": 0.56, "learning_rate": 4.60028982007004e-05, "loss": 0.3428, "step": 662, "task_loss": 0.4718659818172455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45973771810531616, "epoch": 0.56, "learning_rate": 4.599686028257457e-05, "loss": 0.3601, "step": 663, "task_loss": 0.14959101378917694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36007171869277954, "epoch": 0.56, "learning_rate": 4.599082236444874e-05, "loss": 0.367, "step": 664, "task_loss": 0.1878933608531952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30151093006134033, "epoch": 0.56, "learning_rate": 4.598478444632291e-05, "loss": 0.3279, "step": 665, "task_loss": 0.5680833458900452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44395750761032104, "epoch": 0.56, "learning_rate": 4.597874652819708e-05, "loss": 0.3443, "step": 666, "task_loss": 0.38879168033599854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3097415566444397, "epoch": 0.56, "learning_rate": 4.5972708610071255e-05, "loss": 0.3417, "step": 667, "task_loss": 0.5373480319976807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30059152841567993, "epoch": 0.56, "learning_rate": 4.5966670691945415e-05, "loss": 0.3592, "step": 668, "task_loss": 0.645464301109314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2942499816417694, "epoch": 0.57, "learning_rate": 4.596063277381959e-05, "loss": 0.3628, "step": 669, "task_loss": 0.9490315914154053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27622634172439575, "epoch": 0.57, "learning_rate": 4.595459485569376e-05, "loss": 0.2631, "step": 670, "task_loss": 0.3641386330127716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27426183223724365, "epoch": 0.57, "learning_rate": 4.5948556937567924e-05, "loss": 0.3301, "step": 671, "task_loss": 0.9640410542488098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33635398745536804, "epoch": 0.57, "learning_rate": 4.59425190194421e-05, "loss": 0.4058, "step": 672, "task_loss": 0.6574383974075317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43457847833633423, "epoch": 0.57, "learning_rate": 4.593648110131627e-05, "loss": 0.2719, "step": 673, "task_loss": 1.1977999210357666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19407469034194946, "epoch": 0.57, "learning_rate": 4.593044318319044e-05, "loss": 0.295, "step": 674, "task_loss": 0.307859867811203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39448654651641846, "epoch": 0.57, "learning_rate": 4.5924405265064606e-05, "loss": 0.4649, "step": 675, "task_loss": 1.0258768796920776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2316320538520813, "epoch": 0.57, "learning_rate": 4.591836734693878e-05, "loss": 0.4397, "step": 676, "task_loss": 0.6709340810775757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2632187008857727, "epoch": 0.57, "learning_rate": 4.591232942881295e-05, "loss": 0.3157, "step": 677, "task_loss": 0.28071120381355286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3112484812736511, "epoch": 0.57, "learning_rate": 4.5906291510687114e-05, "loss": 0.3235, "step": 678, "task_loss": 0.20828117430210114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3335568308830261, "epoch": 0.57, "learning_rate": 4.590025359256129e-05, "loss": 0.3629, "step": 679, "task_loss": 0.5164096355438232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23583701252937317, "epoch": 0.57, "learning_rate": 4.589421567443546e-05, "loss": 0.2623, "step": 680, "task_loss": 0.7877901196479797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2872389256954193, "epoch": 0.58, "learning_rate": 4.588817775630962e-05, "loss": 0.3789, "step": 681, "task_loss": 0.12168869376182556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14790436625480652, "epoch": 0.58, "learning_rate": 4.5882139838183797e-05, "loss": 0.3465, "step": 682, "task_loss": 0.34522756934165955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7403531670570374, "epoch": 0.58, "learning_rate": 4.587610192005797e-05, "loss": 0.4842, "step": 683, "task_loss": 0.6939802765846252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23989468812942505, "epoch": 0.58, "learning_rate": 4.587006400193213e-05, "loss": 0.2582, "step": 684, "task_loss": 0.6449297070503235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20592886209487915, "epoch": 0.58, "learning_rate": 4.5864026083806305e-05, "loss": 0.3697, "step": 685, "task_loss": 0.3713332414627075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3903454840183258, "epoch": 0.58, "learning_rate": 4.585798816568048e-05, "loss": 0.2636, "step": 686, "task_loss": 0.19788767397403717 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.336739718914032, "epoch": 0.58, "learning_rate": 4.5851950247554646e-05, "loss": 0.3095, "step": 687, "task_loss": 1.0469329357147217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.170437291264534, "epoch": 0.58, "learning_rate": 4.584591232942881e-05, "loss": 0.2963, "step": 688, "task_loss": 0.7811930179595947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29482707381248474, "epoch": 0.58, "learning_rate": 4.583987441130299e-05, "loss": 0.3391, "step": 689, "task_loss": 0.28795668482780457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12704573571681976, "epoch": 0.58, "learning_rate": 4.5833836493177154e-05, "loss": 0.2565, "step": 690, "task_loss": 0.26545649766921997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27358075976371765, "epoch": 0.58, "learning_rate": 4.582779857505132e-05, "loss": 0.3741, "step": 691, "task_loss": 0.47030144929885864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40781766176223755, "epoch": 0.58, "learning_rate": 4.5821760656925496e-05, "loss": 0.37, "step": 692, "task_loss": 0.934550404548645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2664041817188263, "epoch": 0.59, "learning_rate": 4.581572273879966e-05, "loss": 0.5318, "step": 693, "task_loss": 1.5197149515151978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7974572777748108, "epoch": 0.59, "learning_rate": 4.580968482067383e-05, "loss": 0.4612, "step": 694, "task_loss": 0.7451560497283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4035872220993042, "epoch": 0.59, "learning_rate": 4.5803646902548004e-05, "loss": 0.3658, "step": 695, "task_loss": 1.047471046447754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24698840081691742, "epoch": 0.59, "learning_rate": 4.579760898442218e-05, "loss": 0.3036, "step": 696, "task_loss": 0.605748176574707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43346935510635376, "epoch": 0.59, "learning_rate": 4.5791571066296345e-05, "loss": 0.3711, "step": 697, "task_loss": 0.8229206204414368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.438349187374115, "epoch": 0.59, "learning_rate": 4.578553314817051e-05, "loss": 0.4623, "step": 698, "task_loss": 0.3380439877510071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31243640184402466, "epoch": 0.59, "learning_rate": 4.5779495230044686e-05, "loss": 0.3853, "step": 699, "task_loss": 0.4344436824321747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46454307436943054, "epoch": 0.59, "learning_rate": 4.577345731191885e-05, "loss": 0.444, "step": 700, "task_loss": 0.5624050498008728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18277767300605774, "epoch": 0.59, "learning_rate": 4.576741939379302e-05, "loss": 0.4415, "step": 701, "task_loss": 0.45513394474983215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21187463402748108, "epoch": 0.59, "learning_rate": 4.5761381475667194e-05, "loss": 0.2791, "step": 702, "task_loss": 0.6887025237083435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27018314599990845, "epoch": 0.59, "learning_rate": 4.575534355754136e-05, "loss": 0.2991, "step": 703, "task_loss": 0.8194954991340637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30422303080558777, "epoch": 0.59, "learning_rate": 4.574930563941553e-05, "loss": 0.3206, "step": 704, "task_loss": 1.6382495164871216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5321090221405029, "epoch": 0.6, "learning_rate": 4.57432677212897e-05, "loss": 0.3869, "step": 705, "task_loss": 1.6544432640075684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2524600625038147, "epoch": 0.6, "learning_rate": 4.573722980316387e-05, "loss": 0.3343, "step": 706, "task_loss": 0.36589089035987854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2441057562828064, "epoch": 0.6, "learning_rate": 4.573119188503804e-05, "loss": 0.3764, "step": 707, "task_loss": 0.4601496160030365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2476460486650467, "epoch": 0.6, "learning_rate": 4.572515396691221e-05, "loss": 0.292, "step": 708, "task_loss": 0.6016090512275696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21149423718452454, "epoch": 0.6, "learning_rate": 4.571911604878638e-05, "loss": 0.5381, "step": 709, "task_loss": 0.2821265757083893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22485455870628357, "epoch": 0.6, "learning_rate": 4.571307813066055e-05, "loss": 0.3785, "step": 710, "task_loss": 0.7016220092773438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2019926905632019, "epoch": 0.6, "learning_rate": 4.570704021253472e-05, "loss": 0.3841, "step": 711, "task_loss": 1.3649837970733643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42618387937545776, "epoch": 0.6, "learning_rate": 4.570100229440889e-05, "loss": 0.455, "step": 712, "task_loss": 0.8006240129470825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22870805859565735, "epoch": 0.6, "learning_rate": 4.569496437628306e-05, "loss": 0.2952, "step": 713, "task_loss": 0.1706569790840149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33901914954185486, "epoch": 0.6, "learning_rate": 4.568892645815723e-05, "loss": 0.3755, "step": 714, "task_loss": 0.10086208581924438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19824597239494324, "epoch": 0.6, "learning_rate": 4.56828885400314e-05, "loss": 0.2703, "step": 715, "task_loss": 0.4939492642879486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29819124937057495, "epoch": 0.6, "learning_rate": 4.567685062190557e-05, "loss": 0.3263, "step": 716, "task_loss": 0.9665405750274658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24669227004051208, "epoch": 0.61, "learning_rate": 4.5670812703779736e-05, "loss": 0.4604, "step": 717, "task_loss": 0.7395060658454895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15106871724128723, "epoch": 0.61, "learning_rate": 4.566477478565391e-05, "loss": 0.3035, "step": 718, "task_loss": 0.26799696683883667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22672738134860992, "epoch": 0.61, "learning_rate": 4.565873686752808e-05, "loss": 0.305, "step": 719, "task_loss": 0.8576157093048096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5736232995986938, "epoch": 0.61, "learning_rate": 4.565269894940225e-05, "loss": 0.517, "step": 720, "task_loss": 1.0136209726333618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40861254930496216, "epoch": 0.61, "learning_rate": 4.564666103127642e-05, "loss": 0.3475, "step": 721, "task_loss": 0.8161322474479675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44145435094833374, "epoch": 0.61, "learning_rate": 4.5640623113150586e-05, "loss": 0.3303, "step": 722, "task_loss": 0.6067239046096802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14312711358070374, "epoch": 0.61, "learning_rate": 4.563458519502476e-05, "loss": 0.3156, "step": 723, "task_loss": 0.37298744916915894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44245588779449463, "epoch": 0.61, "learning_rate": 4.562854727689893e-05, "loss": 0.4077, "step": 724, "task_loss": 0.7636491656303406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6131328344345093, "epoch": 0.61, "learning_rate": 4.5622509358773094e-05, "loss": 0.3716, "step": 725, "task_loss": 1.061371922492981 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3987913131713867, "epoch": 0.61, "learning_rate": 4.561647144064727e-05, "loss": 0.4169, "step": 726, "task_loss": 0.6444965600967407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37525853514671326, "epoch": 0.61, "learning_rate": 4.5610433522521435e-05, "loss": 0.3535, "step": 727, "task_loss": 0.5094666481018066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23235808312892914, "epoch": 0.61, "learning_rate": 4.56043956043956e-05, "loss": 0.3431, "step": 728, "task_loss": 0.591149091720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16249266266822815, "epoch": 0.62, "learning_rate": 4.5598357686269776e-05, "loss": 0.3705, "step": 729, "task_loss": 0.6060795783996582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20443367958068848, "epoch": 0.62, "learning_rate": 4.559231976814395e-05, "loss": 0.3387, "step": 730, "task_loss": 0.3445514440536499 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2696666121482849, "epoch": 0.62, "learning_rate": 4.558628185001812e-05, "loss": 0.3281, "step": 731, "task_loss": 0.7198132872581482 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28837472200393677, "epoch": 0.62, "learning_rate": 4.5580243931892284e-05, "loss": 0.3746, "step": 732, "task_loss": 1.1804611682891846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3295768201351166, "epoch": 0.62, "learning_rate": 4.557420601376646e-05, "loss": 0.4039, "step": 733, "task_loss": 1.3205760717391968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27145543694496155, "epoch": 0.62, "learning_rate": 4.5568168095640626e-05, "loss": 0.3069, "step": 734, "task_loss": 0.5923184156417847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3135417103767395, "epoch": 0.62, "learning_rate": 4.556213017751479e-05, "loss": 0.4086, "step": 735, "task_loss": 0.43117162585258484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28134819865226746, "epoch": 0.62, "learning_rate": 4.555609225938897e-05, "loss": 0.3104, "step": 736, "task_loss": 0.49730628728866577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4486047625541687, "epoch": 0.62, "learning_rate": 4.5550054341263134e-05, "loss": 0.3291, "step": 737, "task_loss": 0.15193206071853638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22780045866966248, "epoch": 0.62, "learning_rate": 4.55440164231373e-05, "loss": 0.3593, "step": 738, "task_loss": 0.16805656254291534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2513253092765808, "epoch": 0.62, "learning_rate": 4.5537978505011475e-05, "loss": 0.3323, "step": 739, "task_loss": 0.5934991240501404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22833995521068573, "epoch": 0.63, "learning_rate": 4.553194058688565e-05, "loss": 0.2774, "step": 740, "task_loss": 0.618530809879303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3058266043663025, "epoch": 0.63, "learning_rate": 4.552590266875981e-05, "loss": 0.3478, "step": 741, "task_loss": 0.046308696269989014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29397332668304443, "epoch": 0.63, "learning_rate": 4.5519864750633983e-05, "loss": 0.324, "step": 742, "task_loss": 0.47769293189048767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37344565987586975, "epoch": 0.63, "learning_rate": 4.551382683250816e-05, "loss": 0.3952, "step": 743, "task_loss": 1.2599339485168457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3123040795326233, "epoch": 0.63, "learning_rate": 4.550778891438232e-05, "loss": 0.3517, "step": 744, "task_loss": 1.3710837364196777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4613143801689148, "epoch": 0.63, "learning_rate": 4.550175099625649e-05, "loss": 0.3388, "step": 745, "task_loss": 0.3595305383205414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19071230292320251, "epoch": 0.63, "learning_rate": 4.5495713078130666e-05, "loss": 0.3515, "step": 746, "task_loss": 0.9351141452789307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1858353614807129, "epoch": 0.63, "learning_rate": 4.548967516000483e-05, "loss": 0.2997, "step": 747, "task_loss": 0.2936874032020569 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37073928117752075, "epoch": 0.63, "learning_rate": 4.5483637241879e-05, "loss": 0.3776, "step": 748, "task_loss": 0.985735297203064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39584609866142273, "epoch": 0.63, "learning_rate": 4.5477599323753174e-05, "loss": 0.3023, "step": 749, "task_loss": 0.2725343108177185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3656730651855469, "epoch": 0.63, "learning_rate": 4.547156140562734e-05, "loss": 0.3882, "step": 750, "task_loss": 0.6669621467590332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4899216592311859, "epoch": 0.63, "learning_rate": 4.546552348750151e-05, "loss": 0.3371, "step": 751, "task_loss": 0.7428222298622131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2465105801820755, "epoch": 0.64, "learning_rate": 4.545948556937568e-05, "loss": 0.348, "step": 752, "task_loss": 0.40641671419143677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32330381870269775, "epoch": 0.64, "learning_rate": 4.5453447651249856e-05, "loss": 0.2979, "step": 753, "task_loss": 1.0408015251159668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28674131631851196, "epoch": 0.64, "learning_rate": 4.544740973312402e-05, "loss": 0.3006, "step": 754, "task_loss": 0.6484240889549255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2678007483482361, "epoch": 0.64, "learning_rate": 4.544137181499819e-05, "loss": 0.3234, "step": 755, "task_loss": 0.6351743936538696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12141424417495728, "epoch": 0.64, "learning_rate": 4.5435333896872365e-05, "loss": 0.265, "step": 756, "task_loss": 0.5992752313613892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24598108232021332, "epoch": 0.64, "learning_rate": 4.5429295978746525e-05, "loss": 0.4344, "step": 757, "task_loss": 0.36463993787765503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18556594848632812, "epoch": 0.64, "learning_rate": 4.54232580606207e-05, "loss": 0.2636, "step": 758, "task_loss": 0.14679035544395447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15820741653442383, "epoch": 0.64, "learning_rate": 4.541722014249487e-05, "loss": 0.2695, "step": 759, "task_loss": 0.013220874592661858 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33770519495010376, "epoch": 0.64, "learning_rate": 4.541118222436904e-05, "loss": 0.3761, "step": 760, "task_loss": 0.4439811110496521 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18101146817207336, "epoch": 0.64, "learning_rate": 4.540514430624321e-05, "loss": 0.3802, "step": 761, "task_loss": 0.502497136592865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24307934939861298, "epoch": 0.64, "learning_rate": 4.539910638811738e-05, "loss": 0.2357, "step": 762, "task_loss": 1.0100958347320557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2827172875404358, "epoch": 0.64, "learning_rate": 4.539306846999155e-05, "loss": 0.4196, "step": 763, "task_loss": 0.2085815668106079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3874979317188263, "epoch": 0.65, "learning_rate": 4.5387030551865716e-05, "loss": 0.3727, "step": 764, "task_loss": 0.4113468825817108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34265443682670593, "epoch": 0.65, "learning_rate": 4.538099263373989e-05, "loss": 0.3238, "step": 765, "task_loss": 0.29426100850105286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18266001343727112, "epoch": 0.65, "learning_rate": 4.537495471561406e-05, "loss": 0.3644, "step": 766, "task_loss": 0.4295598864555359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25349992513656616, "epoch": 0.65, "learning_rate": 4.5368916797488224e-05, "loss": 0.2717, "step": 767, "task_loss": 0.7366878986358643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2427140176296234, "epoch": 0.65, "learning_rate": 4.53628788793624e-05, "loss": 0.3033, "step": 768, "task_loss": 0.2117612659931183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31193143129348755, "epoch": 0.65, "learning_rate": 4.535684096123657e-05, "loss": 0.3336, "step": 769, "task_loss": 1.5904284715652466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3614553213119507, "epoch": 0.65, "learning_rate": 4.535080304311074e-05, "loss": 0.373, "step": 770, "task_loss": 0.5245720744132996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23396113514900208, "epoch": 0.65, "learning_rate": 4.5344765124984906e-05, "loss": 0.3966, "step": 771, "task_loss": 0.3384198546409607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2826487123966217, "epoch": 0.65, "learning_rate": 4.533872720685908e-05, "loss": 0.3482, "step": 772, "task_loss": 0.8049398064613342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2899158000946045, "epoch": 0.65, "learning_rate": 4.533268928873325e-05, "loss": 0.3689, "step": 773, "task_loss": 0.32007095217704773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34210312366485596, "epoch": 0.65, "learning_rate": 4.5326651370607415e-05, "loss": 0.3304, "step": 774, "task_loss": 1.3622010946273804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23519110679626465, "epoch": 0.65, "learning_rate": 4.532061345248159e-05, "loss": 0.2949, "step": 775, "task_loss": 0.9312596917152405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.351754367351532, "epoch": 0.66, "learning_rate": 4.5314575534355756e-05, "loss": 0.2727, "step": 776, "task_loss": 0.5468899011611938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.180947408080101, "epoch": 0.66, "learning_rate": 4.530853761622992e-05, "loss": 0.2183, "step": 777, "task_loss": 0.0869239792227745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4783400297164917, "epoch": 0.66, "learning_rate": 4.53024996981041e-05, "loss": 0.3296, "step": 778, "task_loss": 0.6275749206542969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3339013159275055, "epoch": 0.66, "learning_rate": 4.5296461779978264e-05, "loss": 0.3194, "step": 779, "task_loss": 0.4619302749633789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3331663906574249, "epoch": 0.66, "learning_rate": 4.529042386185244e-05, "loss": 0.2648, "step": 780, "task_loss": 0.2219018042087555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18654146790504456, "epoch": 0.66, "learning_rate": 4.5284385943726605e-05, "loss": 0.3174, "step": 781, "task_loss": 0.5415080785751343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2856631278991699, "epoch": 0.66, "learning_rate": 4.527834802560077e-05, "loss": 0.339, "step": 782, "task_loss": 1.3013662099838257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22131982445716858, "epoch": 0.66, "learning_rate": 4.5272310107474946e-05, "loss": 0.2747, "step": 783, "task_loss": 0.20025552809238434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3150436580181122, "epoch": 0.66, "learning_rate": 4.5266272189349114e-05, "loss": 0.4531, "step": 784, "task_loss": 0.4217085838317871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3068576455116272, "epoch": 0.66, "learning_rate": 4.526023427122328e-05, "loss": 0.3926, "step": 785, "task_loss": 0.8694249987602234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20825308561325073, "epoch": 0.66, "learning_rate": 4.5254196353097455e-05, "loss": 0.4075, "step": 786, "task_loss": 0.0634683147072792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26817312836647034, "epoch": 0.66, "learning_rate": 4.524815843497162e-05, "loss": 0.3672, "step": 787, "task_loss": 0.5024194121360779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26155024766921997, "epoch": 0.67, "learning_rate": 4.5242120516845796e-05, "loss": 0.3012, "step": 788, "task_loss": 0.29033157229423523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24371114373207092, "epoch": 0.67, "learning_rate": 4.523608259871996e-05, "loss": 0.3037, "step": 789, "task_loss": 0.4643123149871826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20182812213897705, "epoch": 0.67, "learning_rate": 4.523004468059414e-05, "loss": 0.2759, "step": 790, "task_loss": 1.0971732139587402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2818538248538971, "epoch": 0.67, "learning_rate": 4.5224006762468304e-05, "loss": 0.4353, "step": 791, "task_loss": 0.2589888572692871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27974045276641846, "epoch": 0.67, "learning_rate": 4.521796884434247e-05, "loss": 0.3152, "step": 792, "task_loss": 0.9788235425949097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40452176332473755, "epoch": 0.67, "learning_rate": 4.5211930926216645e-05, "loss": 0.3712, "step": 793, "task_loss": 0.7325109839439392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27447766065597534, "epoch": 0.67, "learning_rate": 4.520589300809081e-05, "loss": 0.343, "step": 794, "task_loss": 0.6478328108787537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3494625687599182, "epoch": 0.67, "learning_rate": 4.519985508996498e-05, "loss": 0.3927, "step": 795, "task_loss": 0.41533729434013367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18138387799263, "epoch": 0.67, "learning_rate": 4.5193817171839154e-05, "loss": 0.2647, "step": 796, "task_loss": 0.48322778940200806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3385716378688812, "epoch": 0.67, "learning_rate": 4.518777925371332e-05, "loss": 0.3245, "step": 797, "task_loss": 0.5529921054840088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45837950706481934, "epoch": 0.67, "learning_rate": 4.518174133558749e-05, "loss": 0.3665, "step": 798, "task_loss": 1.170419454574585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.250112384557724, "epoch": 0.67, "learning_rate": 4.517570341746166e-05, "loss": 0.4368, "step": 799, "task_loss": 0.6176565289497375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16649723052978516, "epoch": 0.68, "learning_rate": 4.5169665499335836e-05, "loss": 0.3061, "step": 800, "task_loss": 0.5086045861244202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20179831981658936, "epoch": 0.68, "learning_rate": 4.5163627581209996e-05, "loss": 0.2442, "step": 801, "task_loss": 0.09176841378211975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38554054498672485, "epoch": 0.68, "learning_rate": 4.515758966308417e-05, "loss": 0.3646, "step": 802, "task_loss": 0.5932177305221558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30483534932136536, "epoch": 0.68, "learning_rate": 4.5151551744958344e-05, "loss": 0.437, "step": 803, "task_loss": 0.9456331729888916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30818307399749756, "epoch": 0.68, "learning_rate": 4.514551382683251e-05, "loss": 0.2894, "step": 804, "task_loss": 1.2488499879837036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3145252466201782, "epoch": 0.68, "learning_rate": 4.513947590870668e-05, "loss": 0.3799, "step": 805, "task_loss": 0.7992182970046997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24541282653808594, "epoch": 0.68, "learning_rate": 4.513343799058085e-05, "loss": 0.32, "step": 806, "task_loss": 0.22804608941078186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3519752621650696, "epoch": 0.68, "learning_rate": 4.512740007245502e-05, "loss": 0.322, "step": 807, "task_loss": 0.25990596413612366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4632500410079956, "epoch": 0.68, "learning_rate": 4.512136215432919e-05, "loss": 0.426, "step": 808, "task_loss": 0.9388142824172974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4405101239681244, "epoch": 0.68, "learning_rate": 4.511532423620336e-05, "loss": 0.2835, "step": 809, "task_loss": 0.921435534954071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3464605510234833, "epoch": 0.68, "learning_rate": 4.5109286318077535e-05, "loss": 0.2468, "step": 810, "task_loss": 0.9209409356117249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10707838833332062, "epoch": 0.69, "learning_rate": 4.5103248399951695e-05, "loss": 0.3704, "step": 811, "task_loss": 0.01258013118058443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16764624416828156, "epoch": 0.69, "learning_rate": 4.509721048182587e-05, "loss": 0.2984, "step": 812, "task_loss": 0.0628896951675415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15210193395614624, "epoch": 0.69, "learning_rate": 4.509117256370004e-05, "loss": 0.2296, "step": 813, "task_loss": 0.1468479484319687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41215944290161133, "epoch": 0.69, "learning_rate": 4.5085134645574204e-05, "loss": 0.3346, "step": 814, "task_loss": 0.4282277226448059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2540086805820465, "epoch": 0.69, "learning_rate": 4.507909672744838e-05, "loss": 0.4271, "step": 815, "task_loss": 0.043058332055807114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1628030389547348, "epoch": 0.69, "learning_rate": 4.507305880932255e-05, "loss": 0.39, "step": 816, "task_loss": 0.16738420724868774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21274971961975098, "epoch": 0.69, "learning_rate": 4.506702089119671e-05, "loss": 0.2684, "step": 817, "task_loss": 0.8701009750366211 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.550127387046814, "epoch": 0.69, "learning_rate": 4.5060982973070886e-05, "loss": 0.4048, "step": 818, "task_loss": 0.8876651525497437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31898829340934753, "epoch": 0.69, "learning_rate": 4.505494505494506e-05, "loss": 0.3421, "step": 819, "task_loss": 0.6312325596809387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21779802441596985, "epoch": 0.69, "learning_rate": 4.504890713681923e-05, "loss": 0.3156, "step": 820, "task_loss": 0.27031469345092773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18510892987251282, "epoch": 0.69, "learning_rate": 4.5042869218693394e-05, "loss": 0.2895, "step": 821, "task_loss": 0.6171860694885254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21389347314834595, "epoch": 0.69, "learning_rate": 4.503683130056757e-05, "loss": 0.2918, "step": 822, "task_loss": 0.8571591377258301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27689576148986816, "epoch": 0.7, "learning_rate": 4.5030793382441735e-05, "loss": 0.3184, "step": 823, "task_loss": 0.3355971872806549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33154094219207764, "epoch": 0.7, "learning_rate": 4.50247554643159e-05, "loss": 0.2995, "step": 824, "task_loss": 0.6020426750183105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21476851403713226, "epoch": 0.7, "learning_rate": 4.5018717546190076e-05, "loss": 0.2518, "step": 825, "task_loss": 0.39440909028053284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20731791853904724, "epoch": 0.7, "learning_rate": 4.501267962806425e-05, "loss": 0.2677, "step": 826, "task_loss": 0.5119296312332153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5100505948066711, "epoch": 0.7, "learning_rate": 4.500664170993841e-05, "loss": 0.3259, "step": 827, "task_loss": 0.9436101913452148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5131642818450928, "epoch": 0.7, "learning_rate": 4.5000603791812585e-05, "loss": 0.3358, "step": 828, "task_loss": 0.3923644423484802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3875989615917206, "epoch": 0.7, "learning_rate": 4.499456587368676e-05, "loss": 0.4408, "step": 829, "task_loss": 0.5660097599029541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4496367275714874, "epoch": 0.7, "learning_rate": 4.4988527955560926e-05, "loss": 0.3581, "step": 830, "task_loss": 0.41323772072792053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5072163939476013, "epoch": 0.7, "learning_rate": 4.498249003743509e-05, "loss": 0.4033, "step": 831, "task_loss": 1.2710411548614502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1818779855966568, "epoch": 0.7, "learning_rate": 4.497645211930927e-05, "loss": 0.2262, "step": 832, "task_loss": 0.4064873456954956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2969943583011627, "epoch": 0.7, "learning_rate": 4.4970414201183434e-05, "loss": 0.3584, "step": 833, "task_loss": 1.0696207284927368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37884706258773804, "epoch": 0.7, "learning_rate": 4.49643762830576e-05, "loss": 0.2946, "step": 834, "task_loss": 0.3618290424346924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.253044068813324, "epoch": 0.71, "learning_rate": 4.4958338364931775e-05, "loss": 0.2577, "step": 835, "task_loss": 0.45334598422050476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4066677391529083, "epoch": 0.71, "learning_rate": 4.495230044680594e-05, "loss": 0.3217, "step": 836, "task_loss": 0.9497126936912537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3431035280227661, "epoch": 0.71, "learning_rate": 4.494626252868011e-05, "loss": 0.3464, "step": 837, "task_loss": 0.6192494034767151 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2643086314201355, "epoch": 0.71, "learning_rate": 4.4940224610554284e-05, "loss": 0.3515, "step": 838, "task_loss": 1.0006438493728638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5359170436859131, "epoch": 0.71, "learning_rate": 4.493418669242845e-05, "loss": 0.3932, "step": 839, "task_loss": 0.17423032224178314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45239830017089844, "epoch": 0.71, "learning_rate": 4.4928148774302625e-05, "loss": 0.3296, "step": 840, "task_loss": 0.8025171160697937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3105016052722931, "epoch": 0.71, "learning_rate": 4.492211085617679e-05, "loss": 0.4161, "step": 841, "task_loss": 0.6803241968154907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6275674104690552, "epoch": 0.71, "learning_rate": 4.4916072938050966e-05, "loss": 0.4825, "step": 842, "task_loss": 0.6989018321037292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24314916133880615, "epoch": 0.71, "learning_rate": 4.491003501992513e-05, "loss": 0.4014, "step": 843, "task_loss": 0.1815427541732788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21715101599693298, "epoch": 0.71, "learning_rate": 4.49039971017993e-05, "loss": 0.2853, "step": 844, "task_loss": 0.0939592570066452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43083101511001587, "epoch": 0.71, "learning_rate": 4.4897959183673474e-05, "loss": 0.3325, "step": 845, "task_loss": 0.3811054825782776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46831777691841125, "epoch": 0.71, "learning_rate": 4.489192126554764e-05, "loss": 0.446, "step": 846, "task_loss": 0.32218414545059204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1746586412191391, "epoch": 0.72, "learning_rate": 4.488588334742181e-05, "loss": 0.2254, "step": 847, "task_loss": 1.381054162979126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18915149569511414, "epoch": 0.72, "learning_rate": 4.487984542929598e-05, "loss": 0.3864, "step": 848, "task_loss": 1.0456067323684692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39994895458221436, "epoch": 0.72, "learning_rate": 4.487380751117015e-05, "loss": 0.3548, "step": 849, "task_loss": 0.6537964344024658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3476526141166687, "epoch": 0.72, "learning_rate": 4.4867769593044324e-05, "loss": 0.3429, "step": 850, "task_loss": 1.0994079113006592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3505191504955292, "epoch": 0.72, "learning_rate": 4.486173167491849e-05, "loss": 0.4278, "step": 851, "task_loss": 0.9853001832962036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24659569561481476, "epoch": 0.72, "learning_rate": 4.485569375679266e-05, "loss": 0.3423, "step": 852, "task_loss": 0.3349575698375702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19954511523246765, "epoch": 0.72, "learning_rate": 4.484965583866683e-05, "loss": 0.3044, "step": 853, "task_loss": 0.3239381015300751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3386889100074768, "epoch": 0.72, "learning_rate": 4.4843617920541e-05, "loss": 0.309, "step": 854, "task_loss": 0.2249189168214798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2926265597343445, "epoch": 0.72, "learning_rate": 4.4837580002415166e-05, "loss": 0.2904, "step": 855, "task_loss": 0.6071061491966248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2602430582046509, "epoch": 0.72, "learning_rate": 4.483154208428934e-05, "loss": 0.2715, "step": 856, "task_loss": 0.07972946017980576 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2713681757450104, "epoch": 0.72, "learning_rate": 4.482550416616351e-05, "loss": 0.4366, "step": 857, "task_loss": 0.47239094972610474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43284672498703003, "epoch": 0.72, "learning_rate": 4.4819466248037675e-05, "loss": 0.3625, "step": 858, "task_loss": 1.0727410316467285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3162996172904968, "epoch": 0.73, "learning_rate": 4.481342832991185e-05, "loss": 0.2562, "step": 859, "task_loss": 1.070422887802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24605605006217957, "epoch": 0.73, "learning_rate": 4.480739041178602e-05, "loss": 0.237, "step": 860, "task_loss": 0.3509301543235779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5116292834281921, "epoch": 0.73, "learning_rate": 4.480135249366019e-05, "loss": 0.412, "step": 861, "task_loss": 0.17709815502166748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26727744936943054, "epoch": 0.73, "learning_rate": 4.479531457553436e-05, "loss": 0.3207, "step": 862, "task_loss": 0.6446810364723206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5006260871887207, "epoch": 0.73, "learning_rate": 4.478927665740853e-05, "loss": 0.3148, "step": 863, "task_loss": 0.21939678490161896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34987127780914307, "epoch": 0.73, "learning_rate": 4.47832387392827e-05, "loss": 0.3455, "step": 864, "task_loss": 0.5132679343223572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30705782771110535, "epoch": 0.73, "learning_rate": 4.4777200821156865e-05, "loss": 0.3065, "step": 865, "task_loss": 0.9149826765060425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11884512007236481, "epoch": 0.73, "learning_rate": 4.477116290303104e-05, "loss": 0.2348, "step": 866, "task_loss": 0.27015990018844604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1960749626159668, "epoch": 0.73, "learning_rate": 4.4765124984905207e-05, "loss": 0.3135, "step": 867, "task_loss": 1.0727053880691528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15920498967170715, "epoch": 0.73, "learning_rate": 4.4759087066779374e-05, "loss": 0.331, "step": 868, "task_loss": 0.10256748646497726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3017585575580597, "epoch": 0.73, "learning_rate": 4.475304914865355e-05, "loss": 0.3729, "step": 869, "task_loss": 0.6431319713592529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17750011384487152, "epoch": 0.73, "learning_rate": 4.474701123052772e-05, "loss": 0.3423, "step": 870, "task_loss": 0.4733154773712158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.164194256067276, "epoch": 0.74, "learning_rate": 4.474097331240188e-05, "loss": 0.2685, "step": 871, "task_loss": 0.9117984175682068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42855706810951233, "epoch": 0.74, "learning_rate": 4.4734935394276056e-05, "loss": 0.3357, "step": 872, "task_loss": 0.5845022797584534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11093800514936447, "epoch": 0.74, "learning_rate": 4.472889747615023e-05, "loss": 0.2113, "step": 873, "task_loss": 0.4649612307548523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1649121344089508, "epoch": 0.74, "learning_rate": 4.472285955802439e-05, "loss": 0.2881, "step": 874, "task_loss": 0.48063600063323975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2889198064804077, "epoch": 0.74, "learning_rate": 4.4716821639898564e-05, "loss": 0.2923, "step": 875, "task_loss": 0.600864589214325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46347254514694214, "epoch": 0.74, "learning_rate": 4.471078372177274e-05, "loss": 0.3385, "step": 876, "task_loss": 0.373192697763443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1478535234928131, "epoch": 0.74, "learning_rate": 4.4704745803646905e-05, "loss": 0.2494, "step": 877, "task_loss": 0.7011318802833557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2691228985786438, "epoch": 0.74, "learning_rate": 4.469870788552107e-05, "loss": 0.2829, "step": 878, "task_loss": 0.6395071148872375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40578508377075195, "epoch": 0.74, "learning_rate": 4.469266996739525e-05, "loss": 0.333, "step": 879, "task_loss": 0.5234085321426392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42064720392227173, "epoch": 0.74, "learning_rate": 4.4686632049269414e-05, "loss": 0.3405, "step": 880, "task_loss": 0.7868759632110596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30395185947418213, "epoch": 0.74, "learning_rate": 4.468059413114358e-05, "loss": 0.2518, "step": 881, "task_loss": 0.2462514191865921 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13588252663612366, "epoch": 0.75, "learning_rate": 4.4674556213017755e-05, "loss": 0.3023, "step": 882, "task_loss": 0.1645929515361786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19684652984142303, "epoch": 0.75, "learning_rate": 4.466851829489193e-05, "loss": 0.2963, "step": 883, "task_loss": 0.696341872215271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3418586254119873, "epoch": 0.75, "learning_rate": 4.466248037676609e-05, "loss": 0.3042, "step": 884, "task_loss": 0.3610026240348816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14820131659507751, "epoch": 0.75, "learning_rate": 4.465644245864026e-05, "loss": 0.2708, "step": 885, "task_loss": 1.6670372486114502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2551877200603485, "epoch": 0.75, "learning_rate": 4.465040454051444e-05, "loss": 0.2551, "step": 886, "task_loss": 0.7116249203681946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32418292760849, "epoch": 0.75, "learning_rate": 4.46443666223886e-05, "loss": 0.3433, "step": 887, "task_loss": 0.6139531135559082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25696778297424316, "epoch": 0.75, "learning_rate": 4.463832870426277e-05, "loss": 0.2953, "step": 888, "task_loss": 0.4976051449775696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28035008907318115, "epoch": 0.75, "learning_rate": 4.4632290786136946e-05, "loss": 0.3449, "step": 889, "task_loss": 0.6069408059120178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2542031407356262, "epoch": 0.75, "learning_rate": 4.4626252868011106e-05, "loss": 0.3238, "step": 890, "task_loss": 0.3564848303794861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30425703525543213, "epoch": 0.75, "learning_rate": 4.462021494988528e-05, "loss": 0.2975, "step": 891, "task_loss": 1.2710225582122803 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1999809294939041, "epoch": 0.75, "learning_rate": 4.4614177031759454e-05, "loss": 0.2465, "step": 892, "task_loss": 0.4991018772125244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20470239222049713, "epoch": 0.75, "learning_rate": 4.460813911363362e-05, "loss": 0.3433, "step": 893, "task_loss": 0.4117075502872467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25381970405578613, "epoch": 0.76, "learning_rate": 4.460210119550779e-05, "loss": 0.3004, "step": 894, "task_loss": 1.39918851852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21731606125831604, "epoch": 0.76, "learning_rate": 4.459606327738196e-05, "loss": 0.304, "step": 895, "task_loss": 0.1530483216047287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23831906914710999, "epoch": 0.76, "learning_rate": 4.459002535925613e-05, "loss": 0.3481, "step": 896, "task_loss": 0.3927173614501953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23949450254440308, "epoch": 0.76, "learning_rate": 4.4583987441130297e-05, "loss": 0.3068, "step": 897, "task_loss": 1.2088288068771362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30444851517677307, "epoch": 0.76, "learning_rate": 4.457794952300447e-05, "loss": 0.3504, "step": 898, "task_loss": 0.2169787436723709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32510697841644287, "epoch": 0.76, "learning_rate": 4.4571911604878644e-05, "loss": 0.3373, "step": 899, "task_loss": 1.149895191192627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17038384079933167, "epoch": 0.76, "learning_rate": 4.4565873686752805e-05, "loss": 0.314, "step": 900, "task_loss": 0.24956150352954865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1839144229888916, "epoch": 0.76, "learning_rate": 4.455983576862698e-05, "loss": 0.3675, "step": 901, "task_loss": 1.0437637567520142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46756502985954285, "epoch": 0.76, "learning_rate": 4.455379785050115e-05, "loss": 0.344, "step": 902, "task_loss": 0.43766912817955017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2931916117668152, "epoch": 0.76, "learning_rate": 4.454775993237532e-05, "loss": 0.3348, "step": 903, "task_loss": 0.8494141697883606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5187391042709351, "epoch": 0.76, "learning_rate": 4.454172201424949e-05, "loss": 0.4149, "step": 904, "task_loss": 1.1341482400894165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5336723327636719, "epoch": 0.76, "learning_rate": 4.453568409612366e-05, "loss": 0.283, "step": 905, "task_loss": 0.9153821468353271 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2927427887916565, "epoch": 0.77, "learning_rate": 4.452964617799783e-05, "loss": 0.3614, "step": 906, "task_loss": 1.150106430053711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5022847652435303, "epoch": 0.77, "learning_rate": 4.4523608259871996e-05, "loss": 0.3949, "step": 907, "task_loss": 0.5168099999427795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4198801517486572, "epoch": 0.77, "learning_rate": 4.451757034174617e-05, "loss": 0.3579, "step": 908, "task_loss": 1.7082383632659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3057956397533417, "epoch": 0.77, "learning_rate": 4.451153242362034e-05, "loss": 0.3653, "step": 909, "task_loss": 0.6539680361747742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2797132134437561, "epoch": 0.77, "learning_rate": 4.4505494505494504e-05, "loss": 0.3337, "step": 910, "task_loss": 0.6889963150024414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29004788398742676, "epoch": 0.77, "learning_rate": 4.449945658736868e-05, "loss": 0.2934, "step": 911, "task_loss": 0.1668662130832672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30916184186935425, "epoch": 0.77, "learning_rate": 4.4493418669242845e-05, "loss": 0.3195, "step": 912, "task_loss": 0.4844413995742798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48047077655792236, "epoch": 0.77, "learning_rate": 4.448738075111702e-05, "loss": 0.3777, "step": 913, "task_loss": 1.37309992313385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2226959466934204, "epoch": 0.77, "learning_rate": 4.4481342832991186e-05, "loss": 0.3126, "step": 914, "task_loss": 1.0729975700378418 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33201831579208374, "epoch": 0.77, "learning_rate": 4.447530491486535e-05, "loss": 0.2802, "step": 915, "task_loss": 0.8840923309326172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17303964495658875, "epoch": 0.77, "learning_rate": 4.446926699673953e-05, "loss": 0.2859, "step": 916, "task_loss": 0.5963447093963623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27506813406944275, "epoch": 0.77, "learning_rate": 4.4463229078613694e-05, "loss": 0.3101, "step": 917, "task_loss": 1.2725977897644043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3568727970123291, "epoch": 0.78, "learning_rate": 4.445719116048787e-05, "loss": 0.3362, "step": 918, "task_loss": 0.5199917554855347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2657959461212158, "epoch": 0.78, "learning_rate": 4.4451153242362036e-05, "loss": 0.3018, "step": 919, "task_loss": 1.0269718170166016 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5243659019470215, "epoch": 0.78, "learning_rate": 4.44451153242362e-05, "loss": 0.3201, "step": 920, "task_loss": 1.1536678075790405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2664206027984619, "epoch": 0.78, "learning_rate": 4.443907740611038e-05, "loss": 0.2609, "step": 921, "task_loss": 0.5250079035758972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2522469460964203, "epoch": 0.78, "learning_rate": 4.4433039487984544e-05, "loss": 0.3, "step": 922, "task_loss": 0.3302900791168213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19866514205932617, "epoch": 0.78, "learning_rate": 4.442700156985872e-05, "loss": 0.3272, "step": 923, "task_loss": 1.0624237060546875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19574545323848724, "epoch": 0.78, "learning_rate": 4.4420963651732885e-05, "loss": 0.3473, "step": 924, "task_loss": 1.3533955812454224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5235202312469482, "epoch": 0.78, "learning_rate": 4.441492573360705e-05, "loss": 0.4736, "step": 925, "task_loss": 1.3145147562026978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23668259382247925, "epoch": 0.78, "learning_rate": 4.4408887815481226e-05, "loss": 0.2459, "step": 926, "task_loss": 0.5487642884254456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36069899797439575, "epoch": 0.78, "learning_rate": 4.440284989735539e-05, "loss": 0.2124, "step": 927, "task_loss": 0.13465382158756256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4356892704963684, "epoch": 0.78, "learning_rate": 4.439681197922956e-05, "loss": 0.4098, "step": 928, "task_loss": 0.3588753342628479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5197922587394714, "epoch": 0.78, "learning_rate": 4.4390774061103735e-05, "loss": 0.3846, "step": 929, "task_loss": 0.8379908800125122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2317069172859192, "epoch": 0.79, "learning_rate": 4.43847361429779e-05, "loss": 0.3911, "step": 930, "task_loss": 1.5306708812713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30233681201934814, "epoch": 0.79, "learning_rate": 4.437869822485207e-05, "loss": 0.3162, "step": 931, "task_loss": 0.8812251687049866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16705961525440216, "epoch": 0.79, "learning_rate": 4.437266030672624e-05, "loss": 0.2586, "step": 932, "task_loss": 0.20003189146518707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4740631878376007, "epoch": 0.79, "learning_rate": 4.436662238860042e-05, "loss": 0.3369, "step": 933, "task_loss": 1.3663749694824219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.375772088766098, "epoch": 0.79, "learning_rate": 4.4360584470474584e-05, "loss": 0.369, "step": 934, "task_loss": 0.583289384841919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4133703410625458, "epoch": 0.79, "learning_rate": 4.435454655234875e-05, "loss": 0.3368, "step": 935, "task_loss": 0.3173907697200775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22911988198757172, "epoch": 0.79, "learning_rate": 4.4348508634222925e-05, "loss": 0.3515, "step": 936, "task_loss": 0.13677309453487396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.196448415517807, "epoch": 0.79, "learning_rate": 4.434247071609709e-05, "loss": 0.4182, "step": 937, "task_loss": 0.3450338840484619 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41565096378326416, "epoch": 0.79, "learning_rate": 4.433643279797126e-05, "loss": 0.403, "step": 938, "task_loss": 0.7415960431098938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.241883784532547, "epoch": 0.79, "learning_rate": 4.4330394879845433e-05, "loss": 0.385, "step": 939, "task_loss": 0.30381277203559875 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3282340168952942, "epoch": 0.79, "learning_rate": 4.43243569617196e-05, "loss": 0.3275, "step": 940, "task_loss": 0.9947277903556824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3644707500934601, "epoch": 0.79, "learning_rate": 4.431831904359377e-05, "loss": 0.2945, "step": 941, "task_loss": 0.7145079970359802 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3223477303981781, "epoch": 0.8, "learning_rate": 4.431228112546794e-05, "loss": 0.2993, "step": 942, "task_loss": 0.41398316621780396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3000563979148865, "epoch": 0.8, "learning_rate": 4.4306243207342116e-05, "loss": 0.2881, "step": 943, "task_loss": 0.22363801300525665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3299402892589569, "epoch": 0.8, "learning_rate": 4.4300205289216276e-05, "loss": 0.3253, "step": 944, "task_loss": 0.5077597498893738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2596122622489929, "epoch": 0.8, "learning_rate": 4.429416737109045e-05, "loss": 0.3087, "step": 945, "task_loss": 0.423097163438797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25506094098091125, "epoch": 0.8, "learning_rate": 4.4288129452964624e-05, "loss": 0.2447, "step": 946, "task_loss": 0.4598116874694824 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27282679080963135, "epoch": 0.8, "learning_rate": 4.4282091534838784e-05, "loss": 0.267, "step": 947, "task_loss": 0.40855786204338074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2465524524450302, "epoch": 0.8, "learning_rate": 4.427605361671296e-05, "loss": 0.3157, "step": 948, "task_loss": 1.4026042222976685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2175057828426361, "epoch": 0.8, "learning_rate": 4.427001569858713e-05, "loss": 0.3274, "step": 949, "task_loss": 0.42884060740470886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15002313256263733, "epoch": 0.8, "learning_rate": 4.42639777804613e-05, "loss": 0.2565, "step": 950, "task_loss": 0.698137640953064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24660664796829224, "epoch": 0.8, "learning_rate": 4.425793986233547e-05, "loss": 0.292, "step": 951, "task_loss": 0.5662290453910828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3446224331855774, "epoch": 0.8, "learning_rate": 4.425190194420964e-05, "loss": 0.3067, "step": 952, "task_loss": 0.48581963777542114 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22411227226257324, "epoch": 0.81, "learning_rate": 4.424586402608381e-05, "loss": 0.3351, "step": 953, "task_loss": 0.3839857578277588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3203333020210266, "epoch": 0.81, "learning_rate": 4.4239826107957975e-05, "loss": 0.3357, "step": 954, "task_loss": 0.9429383277893066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2740730345249176, "epoch": 0.81, "learning_rate": 4.423378818983215e-05, "loss": 0.3464, "step": 955, "task_loss": 0.3803875744342804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18013200163841248, "epoch": 0.81, "learning_rate": 4.422775027170632e-05, "loss": 0.2771, "step": 956, "task_loss": 0.037426676601171494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2157067507505417, "epoch": 0.81, "learning_rate": 4.4221712353580483e-05, "loss": 0.2912, "step": 957, "task_loss": 0.7635881900787354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38518476486206055, "epoch": 0.81, "learning_rate": 4.421567443545466e-05, "loss": 0.3426, "step": 958, "task_loss": 0.7743237018585205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25463634729385376, "epoch": 0.81, "learning_rate": 4.420963651732883e-05, "loss": 0.4342, "step": 959, "task_loss": 0.4945194721221924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36092615127563477, "epoch": 0.81, "learning_rate": 4.420359859920299e-05, "loss": 0.3765, "step": 960, "task_loss": 0.5233352184295654 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952161431312561, "epoch": 0.81, "learning_rate": 4.4197560681077166e-05, "loss": 0.3502, "step": 961, "task_loss": 0.05033006891608238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3725583553314209, "epoch": 0.81, "learning_rate": 4.419152276295134e-05, "loss": 0.3647, "step": 962, "task_loss": 0.49692144989967346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2846097946166992, "epoch": 0.81, "learning_rate": 4.418548484482551e-05, "loss": 0.4061, "step": 963, "task_loss": 1.0440422296524048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27408260107040405, "epoch": 0.81, "learning_rate": 4.4179446926699674e-05, "loss": 0.3303, "step": 964, "task_loss": 0.1104651391506195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39300885796546936, "epoch": 0.82, "learning_rate": 4.417340900857385e-05, "loss": 0.3554, "step": 965, "task_loss": 0.3712834417819977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3396458923816681, "epoch": 0.82, "learning_rate": 4.4167371090448015e-05, "loss": 0.3575, "step": 966, "task_loss": 1.6036909818649292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2086452692747116, "epoch": 0.82, "learning_rate": 4.416133317232218e-05, "loss": 0.3655, "step": 967, "task_loss": 0.4195214807987213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15584062039852142, "epoch": 0.82, "learning_rate": 4.4155295254196356e-05, "loss": 0.304, "step": 968, "task_loss": 0.32137513160705566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3015002906322479, "epoch": 0.82, "learning_rate": 4.4149257336070523e-05, "loss": 0.3427, "step": 969, "task_loss": 1.1579301357269287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2543022632598877, "epoch": 0.82, "learning_rate": 4.414321941794469e-05, "loss": 0.285, "step": 970, "task_loss": 0.47699981927871704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2944380044937134, "epoch": 0.82, "learning_rate": 4.4137181499818865e-05, "loss": 0.358, "step": 971, "task_loss": 0.5717900395393372 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26408302783966064, "epoch": 0.82, "learning_rate": 4.413114358169304e-05, "loss": 0.2635, "step": 972, "task_loss": 0.1646224409341812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4564114212989807, "epoch": 0.82, "learning_rate": 4.4125105663567206e-05, "loss": 0.3039, "step": 973, "task_loss": 0.5078140497207642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20683130621910095, "epoch": 0.82, "learning_rate": 4.411906774544137e-05, "loss": 0.316, "step": 974, "task_loss": 0.19483999907970428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2430845946073532, "epoch": 0.82, "learning_rate": 4.411302982731555e-05, "loss": 0.3225, "step": 975, "task_loss": 0.42443209886550903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21450918912887573, "epoch": 0.82, "learning_rate": 4.4106991909189714e-05, "loss": 0.4208, "step": 976, "task_loss": 0.5877259373664856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3073863387107849, "epoch": 0.83, "learning_rate": 4.410095399106388e-05, "loss": 0.3333, "step": 977, "task_loss": 1.2696553468704224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21301056444644928, "epoch": 0.83, "learning_rate": 4.4094916072938055e-05, "loss": 0.3137, "step": 978, "task_loss": 0.5221570730209351 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3419423997402191, "epoch": 0.83, "learning_rate": 4.408887815481222e-05, "loss": 0.3735, "step": 979, "task_loss": 0.10681381821632385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2353341281414032, "epoch": 0.83, "learning_rate": 4.408284023668639e-05, "loss": 0.2944, "step": 980, "task_loss": 1.1870335340499878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2248719334602356, "epoch": 0.83, "learning_rate": 4.4076802318560564e-05, "loss": 0.3282, "step": 981, "task_loss": 1.460014820098877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45160430669784546, "epoch": 0.83, "learning_rate": 4.407076440043473e-05, "loss": 0.3221, "step": 982, "task_loss": 0.32142579555511475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34242257475852966, "epoch": 0.83, "learning_rate": 4.4064726482308905e-05, "loss": 0.2983, "step": 983, "task_loss": 0.5037776827812195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1728317141532898, "epoch": 0.83, "learning_rate": 4.405868856418307e-05, "loss": 0.4662, "step": 984, "task_loss": 0.9488698244094849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2451724410057068, "epoch": 0.83, "learning_rate": 4.405265064605724e-05, "loss": 0.2674, "step": 985, "task_loss": 1.4671635627746582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3261406719684601, "epoch": 0.83, "learning_rate": 4.404661272793141e-05, "loss": 0.317, "step": 986, "task_loss": 1.112964153289795 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3129112124443054, "epoch": 0.83, "learning_rate": 4.404057480980558e-05, "loss": 0.3609, "step": 987, "task_loss": 0.9495210647583008 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2972010374069214, "epoch": 0.83, "learning_rate": 4.403453689167975e-05, "loss": 0.3262, "step": 988, "task_loss": 0.5362968444824219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4374579191207886, "epoch": 0.84, "learning_rate": 4.402849897355392e-05, "loss": 0.3438, "step": 989, "task_loss": 1.0089572668075562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33846473693847656, "epoch": 0.84, "learning_rate": 4.402246105542809e-05, "loss": 0.2915, "step": 990, "task_loss": 1.0764498710632324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.220596581697464, "epoch": 0.84, "learning_rate": 4.401642313730226e-05, "loss": 0.3072, "step": 991, "task_loss": 0.6565892696380615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32423368096351624, "epoch": 0.84, "learning_rate": 4.401038521917643e-05, "loss": 0.2883, "step": 992, "task_loss": 1.0279080867767334 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28588879108428955, "epoch": 0.84, "learning_rate": 4.4004347301050604e-05, "loss": 0.2894, "step": 993, "task_loss": 0.20756539702415466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3498670160770416, "epoch": 0.84, "learning_rate": 4.399830938292477e-05, "loss": 0.3086, "step": 994, "task_loss": 0.9307231903076172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38636162877082825, "epoch": 0.84, "learning_rate": 4.399227146479894e-05, "loss": 0.3729, "step": 995, "task_loss": 0.3652908205986023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2958947420120239, "epoch": 0.84, "learning_rate": 4.398623354667311e-05, "loss": 0.2632, "step": 996, "task_loss": 0.21556514501571655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2821813225746155, "epoch": 0.84, "learning_rate": 4.398019562854728e-05, "loss": 0.3607, "step": 997, "task_loss": 0.9080299139022827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27574506402015686, "epoch": 0.84, "learning_rate": 4.3974157710421446e-05, "loss": 0.349, "step": 998, "task_loss": 1.4849597215652466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32307714223861694, "epoch": 0.84, "learning_rate": 4.396811979229562e-05, "loss": 0.3933, "step": 999, "task_loss": 1.157652497291565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4837101697921753, "epoch": 0.84, "learning_rate": 4.396208187416979e-05, "loss": 0.3183, "step": 1000, "task_loss": 0.6930521130561829 }, { "epoch": 0.84, "eval_accuracy": 0.9144158415841585, "eval_loss": 0.1866626739501953, "eval_runtime": 338.8647, "eval_samples_per_second": 74.514, "eval_steps_per_second": 0.584, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20211786031723022, "epoch": 0.85, "learning_rate": 4.3956043956043955e-05, "loss": 0.2746, "step": 1001, "task_loss": 0.37245434522628784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14325755834579468, "epoch": 0.85, "learning_rate": 4.395000603791813e-05, "loss": 0.2348, "step": 1002, "task_loss": 0.4337844252586365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1996465027332306, "epoch": 0.85, "learning_rate": 4.39439681197923e-05, "loss": 0.3155, "step": 1003, "task_loss": 0.15168380737304688 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29522231221199036, "epoch": 0.85, "learning_rate": 4.393793020166646e-05, "loss": 0.4092, "step": 1004, "task_loss": 0.5470403432846069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.9443536996841431, "epoch": 0.85, "learning_rate": 4.393189228354064e-05, "loss": 0.4567, "step": 1005, "task_loss": 1.8830361366271973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49368518590927124, "epoch": 0.85, "learning_rate": 4.392585436541481e-05, "loss": 0.3289, "step": 1006, "task_loss": 0.09269459545612335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20181068778038025, "epoch": 0.85, "learning_rate": 4.391981644728898e-05, "loss": 0.2896, "step": 1007, "task_loss": 0.27710384130477905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3943747580051422, "epoch": 0.85, "learning_rate": 4.3913778529163145e-05, "loss": 0.3982, "step": 1008, "task_loss": 0.7358403205871582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38803327083587646, "epoch": 0.85, "learning_rate": 4.390774061103732e-05, "loss": 0.305, "step": 1009, "task_loss": 1.1953283548355103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3378750681877136, "epoch": 0.85, "learning_rate": 4.3901702692911486e-05, "loss": 0.4178, "step": 1010, "task_loss": 1.432046890258789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5088300704956055, "epoch": 0.85, "learning_rate": 4.3895664774785654e-05, "loss": 0.3755, "step": 1011, "task_loss": 0.8369073271751404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3163774013519287, "epoch": 0.85, "learning_rate": 4.388962685665983e-05, "loss": 0.3321, "step": 1012, "task_loss": 0.30643585324287415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11590738594532013, "epoch": 0.86, "learning_rate": 4.3883588938534e-05, "loss": 0.2348, "step": 1013, "task_loss": 0.04274863377213478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21037417650222778, "epoch": 0.86, "learning_rate": 4.387755102040816e-05, "loss": 0.3084, "step": 1014, "task_loss": 0.15180319547653198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1809810847043991, "epoch": 0.86, "learning_rate": 4.3871513102282336e-05, "loss": 0.2583, "step": 1015, "task_loss": 0.919890820980072 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30239373445510864, "epoch": 0.86, "learning_rate": 4.386547518415651e-05, "loss": 0.3781, "step": 1016, "task_loss": 0.5444217920303345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16519245505332947, "epoch": 0.86, "learning_rate": 4.385943726603067e-05, "loss": 0.3502, "step": 1017, "task_loss": 0.12372271716594696 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21786856651306152, "epoch": 0.86, "learning_rate": 4.3853399347904844e-05, "loss": 0.2769, "step": 1018, "task_loss": 0.7423974275588989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3422611653804779, "epoch": 0.86, "learning_rate": 4.384736142977902e-05, "loss": 0.4249, "step": 1019, "task_loss": 0.4619762897491455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2182973176240921, "epoch": 0.86, "learning_rate": 4.384132351165318e-05, "loss": 0.2564, "step": 1020, "task_loss": 0.7951963543891907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.151831716299057, "epoch": 0.86, "learning_rate": 4.383528559352735e-05, "loss": 0.4157, "step": 1021, "task_loss": 0.04336428642272949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1442113220691681, "epoch": 0.86, "learning_rate": 4.3829247675401526e-05, "loss": 0.3081, "step": 1022, "task_loss": 0.010821145959198475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3111485540866852, "epoch": 0.86, "learning_rate": 4.3823209757275694e-05, "loss": 0.2749, "step": 1023, "task_loss": 0.8378331065177917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18909099698066711, "epoch": 0.87, "learning_rate": 4.381717183914986e-05, "loss": 0.2794, "step": 1024, "task_loss": 0.13452747464179993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2863258719444275, "epoch": 0.87, "learning_rate": 4.3811133921024035e-05, "loss": 0.3628, "step": 1025, "task_loss": 0.5825551152229309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2623407542705536, "epoch": 0.87, "learning_rate": 4.38050960028982e-05, "loss": 0.2995, "step": 1026, "task_loss": 0.07437510788440704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2909240424633026, "epoch": 0.87, "learning_rate": 4.379905808477237e-05, "loss": 0.2935, "step": 1027, "task_loss": 0.3604443073272705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32045578956604004, "epoch": 0.87, "learning_rate": 4.379302016664654e-05, "loss": 0.3636, "step": 1028, "task_loss": 0.5377231240272522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1561213582754135, "epoch": 0.87, "learning_rate": 4.378698224852072e-05, "loss": 0.3283, "step": 1029, "task_loss": 0.5916603803634644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49331194162368774, "epoch": 0.87, "learning_rate": 4.378094433039488e-05, "loss": 0.3582, "step": 1030, "task_loss": 0.8987064361572266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3306241035461426, "epoch": 0.87, "learning_rate": 4.377490641226905e-05, "loss": 0.354, "step": 1031, "task_loss": 0.23180772364139557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3254731595516205, "epoch": 0.87, "learning_rate": 4.3768868494143225e-05, "loss": 0.2792, "step": 1032, "task_loss": 0.1383245885372162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1822787970304489, "epoch": 0.87, "learning_rate": 4.376283057601739e-05, "loss": 0.4297, "step": 1033, "task_loss": 0.09531703591346741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2677638530731201, "epoch": 0.87, "learning_rate": 4.375679265789156e-05, "loss": 0.3075, "step": 1034, "task_loss": 0.6562163829803467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20685975253582, "epoch": 0.87, "learning_rate": 4.3750754739765734e-05, "loss": 0.2237, "step": 1035, "task_loss": 0.6161696314811707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4285741150379181, "epoch": 0.88, "learning_rate": 4.37447168216399e-05, "loss": 0.3862, "step": 1036, "task_loss": 0.4181616008281708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28285080194473267, "epoch": 0.88, "learning_rate": 4.373867890351407e-05, "loss": 0.471, "step": 1037, "task_loss": 1.4052841663360596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24739590287208557, "epoch": 0.88, "learning_rate": 4.373264098538824e-05, "loss": 0.3541, "step": 1038, "task_loss": 0.23796343803405762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40053582191467285, "epoch": 0.88, "learning_rate": 4.372660306726241e-05, "loss": 0.3829, "step": 1039, "task_loss": 0.7183547019958496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1861676275730133, "epoch": 0.88, "learning_rate": 4.3720565149136576e-05, "loss": 0.3873, "step": 1040, "task_loss": 0.8843746781349182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25143784284591675, "epoch": 0.88, "learning_rate": 4.371452723101075e-05, "loss": 0.4188, "step": 1041, "task_loss": 0.32889324426651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21560464799404144, "epoch": 0.88, "learning_rate": 4.370848931288492e-05, "loss": 0.2681, "step": 1042, "task_loss": 0.7099791765213013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23275157809257507, "epoch": 0.88, "learning_rate": 4.370245139475909e-05, "loss": 0.2865, "step": 1043, "task_loss": 0.36161231994628906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3409966826438904, "epoch": 0.88, "learning_rate": 4.369641347663326e-05, "loss": 0.2789, "step": 1044, "task_loss": 0.6682897210121155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1551985889673233, "epoch": 0.88, "learning_rate": 4.3690375558507426e-05, "loss": 0.2918, "step": 1045, "task_loss": 0.6977567076683044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4235483705997467, "epoch": 0.88, "learning_rate": 4.36843376403816e-05, "loss": 0.3734, "step": 1046, "task_loss": 0.9238823652267456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45389509201049805, "epoch": 0.88, "learning_rate": 4.367829972225577e-05, "loss": 0.39, "step": 1047, "task_loss": 0.15337498486042023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23572328686714172, "epoch": 0.89, "learning_rate": 4.367226180412994e-05, "loss": 0.3884, "step": 1048, "task_loss": 0.4750136137008667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20941177010536194, "epoch": 0.89, "learning_rate": 4.366622388600411e-05, "loss": 0.3282, "step": 1049, "task_loss": 0.59443199634552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3287932276725769, "epoch": 0.89, "learning_rate": 4.3660185967878275e-05, "loss": 0.3408, "step": 1050, "task_loss": 0.5581884980201721 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23965969681739807, "epoch": 0.89, "learning_rate": 4.365414804975245e-05, "loss": 0.3065, "step": 1051, "task_loss": 0.49103525280952454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23221686482429504, "epoch": 0.89, "learning_rate": 4.3648110131626617e-05, "loss": 0.2656, "step": 1052, "task_loss": 0.1033593937754631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5119155049324036, "epoch": 0.89, "learning_rate": 4.364207221350079e-05, "loss": 0.4183, "step": 1053, "task_loss": 0.9471501111984253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49145814776420593, "epoch": 0.89, "learning_rate": 4.363603429537496e-05, "loss": 0.3626, "step": 1054, "task_loss": 0.9312189221382141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10522010922431946, "epoch": 0.89, "learning_rate": 4.3629996377249125e-05, "loss": 0.2465, "step": 1055, "task_loss": 0.09354358911514282 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41132575273513794, "epoch": 0.89, "learning_rate": 4.36239584591233e-05, "loss": 0.3323, "step": 1056, "task_loss": 0.7960087656974792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7278478145599365, "epoch": 0.89, "learning_rate": 4.3617920540997466e-05, "loss": 0.3632, "step": 1057, "task_loss": 0.714910089969635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2962322533130646, "epoch": 0.89, "learning_rate": 4.361188262287163e-05, "loss": 0.3442, "step": 1058, "task_loss": 1.0103408098220825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17133769392967224, "epoch": 0.89, "learning_rate": 4.360584470474581e-05, "loss": 0.3008, "step": 1059, "task_loss": 0.18613861501216888 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3180844485759735, "epoch": 0.9, "learning_rate": 4.3599806786619974e-05, "loss": 0.4371, "step": 1060, "task_loss": 1.3728989362716675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2493615746498108, "epoch": 0.9, "learning_rate": 4.359376886849414e-05, "loss": 0.3169, "step": 1061, "task_loss": 0.8047768473625183 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32007667422294617, "epoch": 0.9, "learning_rate": 4.3587730950368315e-05, "loss": 0.343, "step": 1062, "task_loss": 0.8326956629753113 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26670730113983154, "epoch": 0.9, "learning_rate": 4.358169303224248e-05, "loss": 0.2703, "step": 1063, "task_loss": 0.3882765471935272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5862593054771423, "epoch": 0.9, "learning_rate": 4.3575655114116657e-05, "loss": 0.4138, "step": 1064, "task_loss": 1.2722101211547852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46131211519241333, "epoch": 0.9, "learning_rate": 4.3569617195990824e-05, "loss": 0.4005, "step": 1065, "task_loss": 0.7884384989738464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6409000158309937, "epoch": 0.9, "learning_rate": 4.3563579277865e-05, "loss": 0.4768, "step": 1066, "task_loss": 0.3153364956378937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3720138370990753, "epoch": 0.9, "learning_rate": 4.3557541359739165e-05, "loss": 0.3028, "step": 1067, "task_loss": 0.44241032004356384 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40596243739128113, "epoch": 0.9, "learning_rate": 4.355150344161333e-05, "loss": 0.3424, "step": 1068, "task_loss": 0.8774933815002441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.281246542930603, "epoch": 0.9, "learning_rate": 4.3545465523487506e-05, "loss": 0.2854, "step": 1069, "task_loss": 0.5583416223526001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22644507884979248, "epoch": 0.9, "learning_rate": 4.353942760536167e-05, "loss": 0.3532, "step": 1070, "task_loss": 0.7667713165283203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27582958340644836, "epoch": 0.9, "learning_rate": 4.353338968723584e-05, "loss": 0.2971, "step": 1071, "task_loss": 0.23659303784370422 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7315882444381714, "epoch": 0.91, "learning_rate": 4.3527351769110014e-05, "loss": 0.452, "step": 1072, "task_loss": 1.600015640258789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30030736327171326, "epoch": 0.91, "learning_rate": 4.352131385098418e-05, "loss": 0.2688, "step": 1073, "task_loss": 0.24636352062225342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6928387880325317, "epoch": 0.91, "learning_rate": 4.351527593285835e-05, "loss": 0.3641, "step": 1074, "task_loss": 0.560386598110199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2884763479232788, "epoch": 0.91, "learning_rate": 4.350923801473252e-05, "loss": 0.2604, "step": 1075, "task_loss": 0.44039300084114075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2866283059120178, "epoch": 0.91, "learning_rate": 4.35032000966067e-05, "loss": 0.4177, "step": 1076, "task_loss": 0.3292704224586487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26677486300468445, "epoch": 0.91, "learning_rate": 4.349716217848086e-05, "loss": 0.3373, "step": 1077, "task_loss": 1.0251530408859253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17510847747325897, "epoch": 0.91, "learning_rate": 4.349112426035503e-05, "loss": 0.2605, "step": 1078, "task_loss": 0.4759744107723236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25535091757774353, "epoch": 0.91, "learning_rate": 4.3485086342229205e-05, "loss": 0.2865, "step": 1079, "task_loss": 0.4177473187446594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3829230070114136, "epoch": 0.91, "learning_rate": 4.347904842410337e-05, "loss": 0.3933, "step": 1080, "task_loss": 0.397390753030777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4474875330924988, "epoch": 0.91, "learning_rate": 4.347301050597754e-05, "loss": 0.4057, "step": 1081, "task_loss": 1.24114191532135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30222591757774353, "epoch": 0.91, "learning_rate": 4.346697258785171e-05, "loss": 0.3347, "step": 1082, "task_loss": 1.0539515018463135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2338535189628601, "epoch": 0.91, "learning_rate": 4.346093466972588e-05, "loss": 0.2787, "step": 1083, "task_loss": 0.3205210268497467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2954961657524109, "epoch": 0.92, "learning_rate": 4.345489675160005e-05, "loss": 0.4183, "step": 1084, "task_loss": 0.3001650869846344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23438599705696106, "epoch": 0.92, "learning_rate": 4.344885883347422e-05, "loss": 0.3048, "step": 1085, "task_loss": 0.8028302192687988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2786942422389984, "epoch": 0.92, "learning_rate": 4.3442820915348396e-05, "loss": 0.3152, "step": 1086, "task_loss": 0.5790148973464966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3665401041507721, "epoch": 0.92, "learning_rate": 4.3436782997222556e-05, "loss": 0.3724, "step": 1087, "task_loss": 0.3731836974620819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19855603575706482, "epoch": 0.92, "learning_rate": 4.343074507909673e-05, "loss": 0.3621, "step": 1088, "task_loss": 0.23326639831066132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3520207405090332, "epoch": 0.92, "learning_rate": 4.3424707160970904e-05, "loss": 0.3503, "step": 1089, "task_loss": 0.35438090562820435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28581809997558594, "epoch": 0.92, "learning_rate": 4.3418669242845064e-05, "loss": 0.2611, "step": 1090, "task_loss": 0.6348535418510437 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22808696329593658, "epoch": 0.92, "learning_rate": 4.341263132471924e-05, "loss": 0.3776, "step": 1091, "task_loss": 1.3461005687713623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20256385207176208, "epoch": 0.92, "learning_rate": 4.340659340659341e-05, "loss": 0.3096, "step": 1092, "task_loss": 0.5651939511299133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45031237602233887, "epoch": 0.92, "learning_rate": 4.340055548846757e-05, "loss": 0.3785, "step": 1093, "task_loss": 0.8704009056091309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6844884157180786, "epoch": 0.92, "learning_rate": 4.339451757034175e-05, "loss": 0.4401, "step": 1094, "task_loss": 0.6413459181785583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2970822751522064, "epoch": 0.93, "learning_rate": 4.338847965221592e-05, "loss": 0.2697, "step": 1095, "task_loss": 0.40856292843818665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2479051649570465, "epoch": 0.93, "learning_rate": 4.338244173409009e-05, "loss": 0.2841, "step": 1096, "task_loss": 0.19519725441932678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39759886264801025, "epoch": 0.93, "learning_rate": 4.3376403815964255e-05, "loss": 0.39, "step": 1097, "task_loss": 0.7304579019546509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49157416820526123, "epoch": 0.93, "learning_rate": 4.337036589783843e-05, "loss": 0.391, "step": 1098, "task_loss": 0.9423661828041077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2870109975337982, "epoch": 0.93, "learning_rate": 4.3364327979712596e-05, "loss": 0.2564, "step": 1099, "task_loss": 0.8456416130065918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29677945375442505, "epoch": 0.93, "learning_rate": 4.335829006158676e-05, "loss": 0.2936, "step": 1100, "task_loss": 0.43353864550590515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14345046877861023, "epoch": 0.93, "learning_rate": 4.335225214346094e-05, "loss": 0.3758, "step": 1101, "task_loss": 0.25398945808410645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1733161211013794, "epoch": 0.93, "learning_rate": 4.3346214225335104e-05, "loss": 0.2603, "step": 1102, "task_loss": 0.4628955125808716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20866712927818298, "epoch": 0.93, "learning_rate": 4.334017630720927e-05, "loss": 0.2143, "step": 1103, "task_loss": 0.03302335366606712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3210042715072632, "epoch": 0.93, "learning_rate": 4.3334138389083446e-05, "loss": 0.2666, "step": 1104, "task_loss": 0.7447173595428467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2237682193517685, "epoch": 0.93, "learning_rate": 4.332810047095762e-05, "loss": 0.3293, "step": 1105, "task_loss": 0.8289480805397034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.362856388092041, "epoch": 0.93, "learning_rate": 4.332206255283179e-05, "loss": 0.3326, "step": 1106, "task_loss": 0.6049726605415344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23949892818927765, "epoch": 0.94, "learning_rate": 4.3316024634705954e-05, "loss": 0.3116, "step": 1107, "task_loss": 0.23631735146045685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4181007146835327, "epoch": 0.94, "learning_rate": 4.330998671658013e-05, "loss": 0.3144, "step": 1108, "task_loss": 0.3326930105686188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2478361427783966, "epoch": 0.94, "learning_rate": 4.3303948798454295e-05, "loss": 0.3781, "step": 1109, "task_loss": 0.25559258460998535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3049302399158478, "epoch": 0.94, "learning_rate": 4.329791088032846e-05, "loss": 0.2925, "step": 1110, "task_loss": 0.4718712866306305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2884487509727478, "epoch": 0.94, "learning_rate": 4.3291872962202636e-05, "loss": 0.3083, "step": 1111, "task_loss": 0.44975167512893677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33776795864105225, "epoch": 0.94, "learning_rate": 4.32858350440768e-05, "loss": 0.3427, "step": 1112, "task_loss": 0.761394739151001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24266141653060913, "epoch": 0.94, "learning_rate": 4.327979712595097e-05, "loss": 0.4165, "step": 1113, "task_loss": 0.5390146970748901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39270442724227905, "epoch": 0.94, "learning_rate": 4.3273759207825144e-05, "loss": 0.3647, "step": 1114, "task_loss": 0.3376269042491913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25299280881881714, "epoch": 0.94, "learning_rate": 4.326772128969931e-05, "loss": 0.3573, "step": 1115, "task_loss": 0.07655156403779984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21404612064361572, "epoch": 0.94, "learning_rate": 4.3261683371573486e-05, "loss": 0.2581, "step": 1116, "task_loss": 0.619338870048523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2959321439266205, "epoch": 0.94, "learning_rate": 4.325564545344765e-05, "loss": 0.3032, "step": 1117, "task_loss": 0.44233718514442444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33615487813949585, "epoch": 0.94, "learning_rate": 4.324960753532182e-05, "loss": 0.3433, "step": 1118, "task_loss": 0.2285844087600708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17386695742607117, "epoch": 0.95, "learning_rate": 4.3243569617195994e-05, "loss": 0.2761, "step": 1119, "task_loss": 0.3223957121372223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14410927891731262, "epoch": 0.95, "learning_rate": 4.323753169907016e-05, "loss": 0.2419, "step": 1120, "task_loss": 0.1336435228586197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30185455083847046, "epoch": 0.95, "learning_rate": 4.3231493780944335e-05, "loss": 0.2868, "step": 1121, "task_loss": 1.0119432210922241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24792376160621643, "epoch": 0.95, "learning_rate": 4.32254558628185e-05, "loss": 0.2882, "step": 1122, "task_loss": 0.2847708463668823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3827042579650879, "epoch": 0.95, "learning_rate": 4.321941794469267e-05, "loss": 0.3252, "step": 1123, "task_loss": 0.3445693850517273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2029864639043808, "epoch": 0.95, "learning_rate": 4.3213380026566843e-05, "loss": 0.2912, "step": 1124, "task_loss": 0.9144219160079956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4248496890068054, "epoch": 0.95, "learning_rate": 4.320734210844101e-05, "loss": 0.3746, "step": 1125, "task_loss": 0.42825064063072205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2957007884979248, "epoch": 0.95, "learning_rate": 4.3201304190315185e-05, "loss": 0.3867, "step": 1126, "task_loss": 0.8085893988609314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2927684187889099, "epoch": 0.95, "learning_rate": 4.319526627218935e-05, "loss": 0.3136, "step": 1127, "task_loss": 1.5016776323318481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20850490033626556, "epoch": 0.95, "learning_rate": 4.318922835406352e-05, "loss": 0.3148, "step": 1128, "task_loss": 1.0765442848205566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3674388527870178, "epoch": 0.95, "learning_rate": 4.318319043593769e-05, "loss": 0.3104, "step": 1129, "task_loss": 0.9712739586830139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32994064688682556, "epoch": 0.95, "learning_rate": 4.317715251781186e-05, "loss": 0.3463, "step": 1130, "task_loss": 0.9124463796615601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20151212811470032, "epoch": 0.96, "learning_rate": 4.317111459968603e-05, "loss": 0.2997, "step": 1131, "task_loss": 0.07413379848003387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34024250507354736, "epoch": 0.96, "learning_rate": 4.31650766815602e-05, "loss": 0.41, "step": 1132, "task_loss": 0.10200577229261398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2040729820728302, "epoch": 0.96, "learning_rate": 4.315903876343437e-05, "loss": 0.3713, "step": 1133, "task_loss": 0.6439746618270874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24761268496513367, "epoch": 0.96, "learning_rate": 4.3153000845308536e-05, "loss": 0.3052, "step": 1134, "task_loss": 0.2621586322784424 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25512057542800903, "epoch": 0.96, "learning_rate": 4.314696292718271e-05, "loss": 0.4092, "step": 1135, "task_loss": 1.3069474697113037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28453660011291504, "epoch": 0.96, "learning_rate": 4.3140925009056883e-05, "loss": 0.3304, "step": 1136, "task_loss": 0.7997164130210876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34711629152297974, "epoch": 0.96, "learning_rate": 4.313488709093105e-05, "loss": 0.3129, "step": 1137, "task_loss": 0.8514912724494934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2861866354942322, "epoch": 0.96, "learning_rate": 4.312884917280522e-05, "loss": 0.2589, "step": 1138, "task_loss": 0.4697415828704834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19450606405735016, "epoch": 0.96, "learning_rate": 4.312281125467939e-05, "loss": 0.2788, "step": 1139, "task_loss": 0.9088435173034668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45894676446914673, "epoch": 0.96, "learning_rate": 4.311677333655356e-05, "loss": 0.3069, "step": 1140, "task_loss": 0.2890830338001251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.527074933052063, "epoch": 0.96, "learning_rate": 4.3110735418427726e-05, "loss": 0.3773, "step": 1141, "task_loss": 0.8116379976272583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5287387371063232, "epoch": 0.96, "learning_rate": 4.31046975003019e-05, "loss": 0.4034, "step": 1142, "task_loss": 0.8627524375915527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1560383439064026, "epoch": 0.97, "learning_rate": 4.309865958217607e-05, "loss": 0.2749, "step": 1143, "task_loss": 0.3474748432636261 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20934341847896576, "epoch": 0.97, "learning_rate": 4.3092621664050235e-05, "loss": 0.2869, "step": 1144, "task_loss": 0.28019991517066956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25519466400146484, "epoch": 0.97, "learning_rate": 4.308658374592441e-05, "loss": 0.2733, "step": 1145, "task_loss": 0.2840352952480316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24379102885723114, "epoch": 0.97, "learning_rate": 4.308054582779858e-05, "loss": 0.3583, "step": 1146, "task_loss": 1.8388844728469849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2644484043121338, "epoch": 0.97, "learning_rate": 4.307450790967274e-05, "loss": 0.2502, "step": 1147, "task_loss": 0.24468854069709778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2651967406272888, "epoch": 0.97, "learning_rate": 4.306846999154692e-05, "loss": 0.3498, "step": 1148, "task_loss": 0.6555808186531067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.290274053812027, "epoch": 0.97, "learning_rate": 4.306243207342109e-05, "loss": 0.2842, "step": 1149, "task_loss": 0.9025721549987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5018739700317383, "epoch": 0.97, "learning_rate": 4.305639415529525e-05, "loss": 0.3498, "step": 1150, "task_loss": 0.9152520895004272 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3573232889175415, "epoch": 0.97, "learning_rate": 4.3050356237169425e-05, "loss": 0.2871, "step": 1151, "task_loss": 0.8320004940032959 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3362210988998413, "epoch": 0.97, "learning_rate": 4.30443183190436e-05, "loss": 0.2646, "step": 1152, "task_loss": 0.3134423792362213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2594633400440216, "epoch": 0.97, "learning_rate": 4.3038280400917766e-05, "loss": 0.3415, "step": 1153, "task_loss": 0.2707746922969818 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11032459884881973, "epoch": 0.97, "learning_rate": 4.3032242482791933e-05, "loss": 0.3128, "step": 1154, "task_loss": 0.6009519696235657 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42776772379875183, "epoch": 0.98, "learning_rate": 4.302620456466611e-05, "loss": 0.3611, "step": 1155, "task_loss": 1.070064902305603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21777436137199402, "epoch": 0.98, "learning_rate": 4.3020166646540275e-05, "loss": 0.308, "step": 1156, "task_loss": 0.24087083339691162 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14768707752227783, "epoch": 0.98, "learning_rate": 4.301412872841444e-05, "loss": 0.2804, "step": 1157, "task_loss": 0.48272258043289185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32201606035232544, "epoch": 0.98, "learning_rate": 4.3008090810288616e-05, "loss": 0.2526, "step": 1158, "task_loss": 0.22566260397434235 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1533641219139099, "epoch": 0.98, "learning_rate": 4.300205289216279e-05, "loss": 0.3758, "step": 1159, "task_loss": 0.4144149124622345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2917732000350952, "epoch": 0.98, "learning_rate": 4.299601497403695e-05, "loss": 0.2851, "step": 1160, "task_loss": 0.8044385313987732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30744171142578125, "epoch": 0.98, "learning_rate": 4.2989977055911124e-05, "loss": 0.3513, "step": 1161, "task_loss": 0.4882700443267822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2208593338727951, "epoch": 0.98, "learning_rate": 4.29839391377853e-05, "loss": 0.3355, "step": 1162, "task_loss": 0.5675768852233887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4546198844909668, "epoch": 0.98, "learning_rate": 4.297790121965946e-05, "loss": 0.3864, "step": 1163, "task_loss": 1.3814033269882202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3526228070259094, "epoch": 0.98, "learning_rate": 4.297186330153363e-05, "loss": 0.282, "step": 1164, "task_loss": 0.990058422088623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.402987539768219, "epoch": 0.98, "learning_rate": 4.2965825383407806e-05, "loss": 0.2678, "step": 1165, "task_loss": 0.3445275127887726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4974266290664673, "epoch": 0.99, "learning_rate": 4.2959787465281974e-05, "loss": 0.3386, "step": 1166, "task_loss": 0.6241133213043213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2324366271495819, "epoch": 0.99, "learning_rate": 4.295374954715614e-05, "loss": 0.2212, "step": 1167, "task_loss": 0.2512488067150116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2108522355556488, "epoch": 0.99, "learning_rate": 4.2947711629030315e-05, "loss": 0.263, "step": 1168, "task_loss": 0.9792559146881104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4749847650527954, "epoch": 0.99, "learning_rate": 4.294167371090448e-05, "loss": 0.3664, "step": 1169, "task_loss": 0.7645936608314514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22060546278953552, "epoch": 0.99, "learning_rate": 4.293563579277865e-05, "loss": 0.2002, "step": 1170, "task_loss": 0.2009570151567459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2716113328933716, "epoch": 0.99, "learning_rate": 4.292959787465282e-05, "loss": 0.2465, "step": 1171, "task_loss": 0.10613411664962769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.420468270778656, "epoch": 0.99, "learning_rate": 4.292355995652699e-05, "loss": 0.3057, "step": 1172, "task_loss": 0.9572458863258362 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18062123656272888, "epoch": 0.99, "learning_rate": 4.291752203840116e-05, "loss": 0.2856, "step": 1173, "task_loss": 0.28005337715148926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20923954248428345, "epoch": 0.99, "learning_rate": 4.291148412027533e-05, "loss": 0.2916, "step": 1174, "task_loss": 0.3251589238643646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23297256231307983, "epoch": 0.99, "learning_rate": 4.29054462021495e-05, "loss": 0.276, "step": 1175, "task_loss": 1.2257041931152344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7452914118766785, "epoch": 0.99, "learning_rate": 4.289940828402367e-05, "loss": 0.4171, "step": 1176, "task_loss": 0.6722822189331055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13628363609313965, "epoch": 0.99, "learning_rate": 4.289337036589784e-05, "loss": 0.2287, "step": 1177, "task_loss": 0.3862496614456177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21069841086864471, "epoch": 1.0, "learning_rate": 4.2887332447772014e-05, "loss": 0.2761, "step": 1178, "task_loss": 0.5023462176322937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3206387758255005, "epoch": 1.0, "learning_rate": 4.288129452964618e-05, "loss": 0.2972, "step": 1179, "task_loss": 1.333728551864624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17135363817214966, "epoch": 1.0, "learning_rate": 4.287525661152035e-05, "loss": 0.2476, "step": 1180, "task_loss": 0.3198869824409485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2221292406320572, "epoch": 1.0, "learning_rate": 4.286921869339452e-05, "loss": 0.2624, "step": 1181, "task_loss": 1.1573820114135742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23098769783973694, "epoch": 1.0, "learning_rate": 4.286318077526869e-05, "loss": 0.3488, "step": 1182, "task_loss": 0.16446726024150848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18893688917160034, "epoch": 1.0, "learning_rate": 4.2857142857142856e-05, "loss": 0.3232, "step": 1183, "task_loss": 0.3497985005378723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -0.004563917405903339, "compression/movement_sparsity/linear_layer_sparsity": 0.0010001276362903743, "compression/movement_sparsity/model_sparsity": 0.0009657701752313146, "compression_loss": 0.0, "distillation_loss": 0.15048199892044067, "epoch": 1.0, "learning_rate": 4.285110493901703e-05, "loss": 0.5613, "step": 1184, "task_loss": 0.7848635911941528 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0037974665492690463, "compression/movement_sparsity/importance_threshold": -0.004552363190114976, "compression/movement_sparsity/linear_layer_sparsity": 0.001039835114517798, "compression/movement_sparsity/model_sparsity": 0.0010041135794271354, "compression_loss": 0.41029655933380127, "distillation_loss": 0.37748983502388, "epoch": 1.0, "learning_rate": 4.28450670208912e-05, "loss": 0.8251, "step": 1185, "task_loss": 0.9553921818733215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007588518460622751, "compression/movement_sparsity/importance_threshold": -0.004540828491578369, "compression/movement_sparsity/linear_layer_sparsity": 0.0010566362667167291, "compression/movement_sparsity/model_sparsity": 0.0010203375603616434, "compression_loss": 0.8199001550674438, "distillation_loss": 0.25276631116867065, "epoch": 1.0, "learning_rate": 4.283902910276537e-05, "loss": 1.1295, "step": 1186, "task_loss": 0.49604928493499756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.011373161156410516, "compression/movement_sparsity/importance_threshold": -0.004529313293795415, "compression/movement_sparsity/linear_layer_sparsity": 0.0010719111254582697, "compression/movement_sparsity/model_sparsity": 0.0010350876807144502, "compression_loss": 1.2288110256195068, "distillation_loss": 0.3114812970161438, "epoch": 1.0, "learning_rate": 4.283299118463954e-05, "loss": 1.5215, "step": 1187, "task_loss": 0.9576715230941772 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015151400058979747, "compression/movement_sparsity/importance_threshold": -0.004517817580268018, "compression/movement_sparsity/linear_layer_sparsity": 0.0010856477665747838, "compression/movement_sparsity/model_sparsity": 0.0010483524259497611, "compression_loss": 1.6370296478271484, "distillation_loss": 0.5240944027900696, "epoch": 1.0, "learning_rate": 4.2826953266513706e-05, "loss": 2.0533, "step": 1188, "task_loss": 0.758603036403656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018923240590679014, "compression/movement_sparsity/importance_threshold": -0.004506341334498075, "compression/movement_sparsity/linear_layer_sparsity": 0.0011207525160947646, "compression/movement_sparsity/model_sparsity": 0.0010822512193288893, "compression_loss": 2.0445544719696045, "distillation_loss": 0.26520106196403503, "epoch": 1.01, "learning_rate": 4.282091534838788e-05, "loss": 2.3402, "step": 1189, "task_loss": 0.23267896473407745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022688688173855553, "compression/movement_sparsity/importance_threshold": -0.004494884539987491, "compression/movement_sparsity/linear_layer_sparsity": 0.0011543428963249906, "compression/movement_sparsity/model_sparsity": 0.0011146876666621108, "compression_loss": 2.451387882232666, "distillation_loss": 0.35634660720825195, "epoch": 1.01, "learning_rate": 4.281487743026205e-05, "loss": 2.8116, "step": 1190, "task_loss": 0.2072899341583252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.026447748230858936, "compression/movement_sparsity/importance_threshold": -0.004483447180238162, "compression/movement_sparsity/linear_layer_sparsity": 0.0011436588421232573, "compression/movement_sparsity/model_sparsity": 0.001104370642590202, "compression_loss": 2.857529401779175, "distillation_loss": 0.3354710042476654, "epoch": 1.01, "learning_rate": 4.2808839512136214e-05, "loss": 3.1024, "step": 1191, "task_loss": 0.565451979637146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.030200426184036733, "compression/movement_sparsity/importance_threshold": -0.00447202923875199, "compression/movement_sparsity/linear_layer_sparsity": 0.001161974363611943, "compression/movement_sparsity/model_sparsity": 0.001122056969570617, "compression_loss": 3.2629759311676025, "distillation_loss": 0.33731716871261597, "epoch": 1.01, "learning_rate": 4.280280159401039e-05, "loss": 3.6671, "step": 1192, "task_loss": 0.5615651607513428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.033946727455736514, "compression/movement_sparsity/importance_threshold": -0.004460630699030878, "compression/movement_sparsity/linear_layer_sparsity": 0.0011680795374415047, "compression/movement_sparsity/model_sparsity": 0.0011279524118974218, "compression_loss": 3.6677308082580566, "distillation_loss": 0.3487268090248108, "epoch": 1.01, "learning_rate": 4.2796763675884555e-05, "loss": 4.0313, "step": 1193, "task_loss": 0.7415367960929871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03768665746830768, "compression/movement_sparsity/importance_threshold": -0.004449251544576722, "compression/movement_sparsity/linear_layer_sparsity": 0.0011925121569273881, "compression/movement_sparsity/model_sparsity": 0.0011515456957404359, "compression_loss": 4.071794509887695, "distillation_loss": 0.3797004520893097, "epoch": 1.01, "learning_rate": 4.279072575775873e-05, "loss": 4.4957, "step": 1194, "task_loss": 1.3221724033355713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04142022164409781, "compression/movement_sparsity/importance_threshold": -0.004437891758891425, "compression/movement_sparsity/linear_layer_sparsity": 0.0012308722042119595, "compression/movement_sparsity/model_sparsity": 0.0011885879573914737, "compression_loss": 4.475164413452148, "distillation_loss": 0.21213014423847198, "epoch": 1.01, "learning_rate": 4.2784687839632896e-05, "loss": 4.8729, "step": 1195, "task_loss": 0.08939064294099808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04514742540545447, "compression/movement_sparsity/importance_threshold": -0.004426551325476888, "compression/movement_sparsity/linear_layer_sparsity": 0.0012646533711243592, "compression/movement_sparsity/model_sparsity": 0.0012212086372974077, "compression_loss": 4.877840995788574, "distillation_loss": 0.18829788267612457, "epoch": 1.01, "learning_rate": 4.277864992150707e-05, "loss": 5.3977, "step": 1196, "task_loss": 0.369351327419281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04886827417472722, "compression/movement_sparsity/importance_threshold": -0.0044152302278350096, "compression/movement_sparsity/linear_layer_sparsity": 0.0012770783538009285, "compression/movement_sparsity/model_sparsity": 0.0012332067835953192, "compression_loss": 5.279824733734131, "distillation_loss": 0.25111642479896545, "epoch": 1.01, "learning_rate": 4.277261200338124e-05, "loss": 5.5838, "step": 1197, "task_loss": 0.5028445720672607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05258277337426348, "compression/movement_sparsity/importance_threshold": -0.004403928449467692, "compression/movement_sparsity/linear_layer_sparsity": 0.0012772691404831025, "compression/movement_sparsity/model_sparsity": 0.0012333910161680319, "compression_loss": 5.681119918823242, "distillation_loss": 0.16469311714172363, "epoch": 1.01, "learning_rate": 4.2766574085255405e-05, "loss": 5.9799, "step": 1198, "task_loss": 0.6138965487480164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05629092842641148, "compression/movement_sparsity/importance_threshold": -0.004392645973876834, "compression/movement_sparsity/linear_layer_sparsity": 0.001306638365370233, "compression/movement_sparsity/model_sparsity": 0.0012617513178299858, "compression_loss": 6.081721305847168, "distillation_loss": 0.23099678754806519, "epoch": 1.01, "learning_rate": 4.276053616712958e-05, "loss": 6.3855, "step": 1199, "task_loss": 0.977968692779541 }, { "compression/movement_sparsity/importance_regularization_factor": 0.059992744753518956, "compression/movement_sparsity/importance_threshold": -0.0043813827845643385, "compression/movement_sparsity/linear_layer_sparsity": 0.0013320368424346212, "compression/movement_sparsity/model_sparsity": 0.0012862772790723576, "compression_loss": 6.481628894805908, "distillation_loss": 0.35345980525016785, "epoch": 1.01, "learning_rate": 4.2754498249003746e-05, "loss": 6.9538, "step": 1200, "task_loss": 0.5519553422927856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06368822777793498, "compression/movement_sparsity/importance_threshold": -0.004370138865032103, "compression/movement_sparsity/linear_layer_sparsity": 0.0013814505931176375, "compression/movement_sparsity/model_sparsity": 0.0013339935154049348, "compression_loss": 6.880846977233887, "distillation_loss": 0.18901535868644714, "epoch": 1.02, "learning_rate": 4.274846033087791e-05, "loss": 7.1905, "step": 1201, "task_loss": 0.2078811377286911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06737738292200746, "compression/movement_sparsity/importance_threshold": -0.00435891419878203, "compression/movement_sparsity/linear_layer_sparsity": 0.001485620121584537, "compression/movement_sparsity/model_sparsity": 0.0014345845001060432, "compression_loss": 7.279369354248047, "distillation_loss": 0.18496623635292053, "epoch": 1.02, "learning_rate": 4.274242241275209e-05, "loss": 7.6314, "step": 1202, "task_loss": 0.4454924762248993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07106021560808395, "compression/movement_sparsity/importance_threshold": -0.00434770876931602, "compression/movement_sparsity/linear_layer_sparsity": 0.001539231179275377, "compression/movement_sparsity/model_sparsity": 0.0014863538530382986, "compression_loss": 7.677201747894287, "distillation_loss": 0.40692412853240967, "epoch": 1.02, "learning_rate": 4.2736384494626254e-05, "loss": 8.0471, "step": 1203, "task_loss": 0.9005877375602722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07473673125851371, "compression/movement_sparsity/importance_threshold": -0.004336522560135972, "compression/movement_sparsity/linear_layer_sparsity": 0.0015924725877695053, "compression/movement_sparsity/model_sparsity": 0.0015377662553609233, "compression_loss": 8.074338912963867, "distillation_loss": 0.3484841585159302, "epoch": 1.02, "learning_rate": 4.273034657650042e-05, "loss": 8.5193, "step": 1204, "task_loss": 1.199270248413086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07840693529564446, "compression/movement_sparsity/importance_threshold": -0.004325355554743787, "compression/movement_sparsity/linear_layer_sparsity": 0.0016590690640158003, "compression/movement_sparsity/model_sparsity": 0.0016020749377734335, "compression_loss": 8.470785140991211, "distillation_loss": 0.2438778281211853, "epoch": 1.02, "learning_rate": 4.2724308658374595e-05, "loss": 8.7582, "step": 1205, "task_loss": 0.17912335693836212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08207083314182378, "compression/movement_sparsity/importance_threshold": -0.004314207736641367, "compression/movement_sparsity/linear_layer_sparsity": 0.001721670944104081, "compression/movement_sparsity/model_sparsity": 0.0016625262506947728, "compression_loss": 8.866531372070312, "distillation_loss": 0.29903608560562134, "epoch": 1.02, "learning_rate": 4.271827074024877e-05, "loss": 9.2483, "step": 1206, "task_loss": 0.48423251509666443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08572843021940091, "compression/movement_sparsity/importance_threshold": -0.00430307908933061, "compression/movement_sparsity/linear_layer_sparsity": 0.0018340562240720898, "compression/movement_sparsity/model_sparsity": 0.0017710507505583196, "compression_loss": 9.261591911315918, "distillation_loss": 0.270751953125, "epoch": 1.02, "learning_rate": 4.271223282212293e-05, "loss": 9.5269, "step": 1207, "task_loss": 0.39931365847587585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08937973195072357, "compression/movement_sparsity/importance_threshold": -0.0042919695963134176, "compression/movement_sparsity/linear_layer_sparsity": 0.002024079759517203, "compression/movement_sparsity/model_sparsity": 0.0019545463929801217, "compression_loss": 9.65595817565918, "distillation_loss": 0.33985987305641174, "epoch": 1.02, "learning_rate": 4.2706194903997104e-05, "loss": 10.0822, "step": 1208, "task_loss": 1.621153473854065 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09302474375814002, "compression/movement_sparsity/importance_threshold": -0.0042808792410916905, "compression/movement_sparsity/linear_layer_sparsity": 0.0020845591377663003, "compression/movement_sparsity/model_sparsity": 0.0020129481185300327, "compression_loss": 10.049636840820312, "distillation_loss": 0.22262011468410492, "epoch": 1.02, "learning_rate": 4.270015698587128e-05, "loss": 10.3218, "step": 1209, "task_loss": 0.8181881308555603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09666347106399781, "compression/movement_sparsity/importance_threshold": -0.004269808007167331, "compression/movement_sparsity/linear_layer_sparsity": 0.0021946311292129514, "compression/movement_sparsity/model_sparsity": 0.002119238798449439, "compression_loss": 10.442619323730469, "distillation_loss": 0.41165685653686523, "epoch": 1.02, "learning_rate": 4.2694119067745445e-05, "loss": 10.8912, "step": 1210, "task_loss": 0.9035671353340149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10029591929064618, "compression/movement_sparsity/importance_threshold": -0.004258755878042235, "compression/movement_sparsity/linear_layer_sparsity": 0.002374352183820679, "compression/movement_sparsity/model_sparsity": 0.0022927858819447576, "compression_loss": 10.8349027633667, "distillation_loss": 0.32800403237342834, "epoch": 1.02, "learning_rate": 4.268808114961961e-05, "loss": 11.2702, "step": 1211, "task_loss": 0.9444795250892639 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10392209386043288, "compression/movement_sparsity/importance_threshold": -0.004247722837218306, "compression/movement_sparsity/linear_layer_sparsity": 0.002492079490889554, "compression/movement_sparsity/model_sparsity": 0.0024064688938442587, "compression_loss": 11.226493835449219, "distillation_loss": 0.4187358617782593, "epoch": 1.02, "learning_rate": 4.2682043231493786e-05, "loss": 11.5779, "step": 1212, "task_loss": 0.7167431712150574 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10754200019570564, "compression/movement_sparsity/importance_threshold": -0.004236708868197445, "compression/movement_sparsity/linear_layer_sparsity": 0.002679622799466408, "compression/movement_sparsity/model_sparsity": 0.0025875695128207967, "compression_loss": 11.617390632629395, "distillation_loss": 0.19560036063194275, "epoch": 1.03, "learning_rate": 4.267600531336795e-05, "loss": 11.8148, "step": 1213, "task_loss": 0.23003944754600525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11115564371881353, "compression/movement_sparsity/importance_threshold": -0.00422571395448155, "compression/movement_sparsity/linear_layer_sparsity": 0.002910283898214542, "compression/movement_sparsity/model_sparsity": 0.0028103066932303935, "compression_loss": 12.007583618164062, "distillation_loss": 0.31421175599098206, "epoch": 1.03, "learning_rate": 4.266996739524212e-05, "loss": 12.3871, "step": 1214, "task_loss": 0.4106763005256653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11476302985210429, "compression/movement_sparsity/importance_threshold": -0.004214738079572524, "compression/movement_sparsity/linear_layer_sparsity": 0.003149733108510307, "compression/movement_sparsity/model_sparsity": 0.0030415300865205674, "compression_loss": 12.397085189819336, "distillation_loss": 0.18430189788341522, "epoch": 1.03, "learning_rate": 4.2663929477116294e-05, "loss": 12.6978, "step": 1215, "task_loss": 0.9206427335739136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11836416401792565, "compression/movement_sparsity/importance_threshold": -0.004203781226972267, "compression/movement_sparsity/linear_layer_sparsity": 0.0034414697938893314, "compression/movement_sparsity/model_sparsity": 0.0033232447192698025, "compression_loss": 12.785908699035645, "distillation_loss": 0.21479223668575287, "epoch": 1.03, "learning_rate": 4.265789155899047e-05, "loss": 13.1885, "step": 1216, "task_loss": 0.3621373474597931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12195905163862686, "compression/movement_sparsity/importance_threshold": -0.004192843380182677, "compression/movement_sparsity/linear_layer_sparsity": 0.0037257657986635774, "compression/movement_sparsity/model_sparsity": 0.003597774281683244, "compression_loss": 13.174031257629395, "distillation_loss": 0.30214691162109375, "epoch": 1.03, "learning_rate": 4.265185364086463e-05, "loss": 13.4975, "step": 1217, "task_loss": 0.43067657947540283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1255476981365553, "compression/movement_sparsity/importance_threshold": -0.0041819245227056575, "compression/movement_sparsity/linear_layer_sparsity": 0.004055433261292282, "compression/movement_sparsity/model_sparsity": 0.0039161166527949135, "compression_loss": 13.561464309692383, "distillation_loss": 0.20788107812404633, "epoch": 1.03, "learning_rate": 4.26458157227388e-05, "loss": 13.8864, "step": 1218, "task_loss": 0.7008878588676453 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12913010893405957, "compression/movement_sparsity/importance_threshold": -0.004171024638043107, "compression/movement_sparsity/linear_layer_sparsity": 0.004409354480892332, "compression/movement_sparsity/model_sparsity": 0.004257879589712679, "compression_loss": 13.948198318481445, "distillation_loss": 0.25103408098220825, "epoch": 1.03, "learning_rate": 4.2639777804612977e-05, "loss": 14.2062, "step": 1219, "task_loss": 0.31897789239883423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1327062894534874, "compression/movement_sparsity/importance_threshold": -0.004160143709696928, "compression/movement_sparsity/linear_layer_sparsity": 0.004785025382260197, "compression/movement_sparsity/model_sparsity": 0.004620645039919687, "compression_loss": 14.334238052368164, "distillation_loss": 0.14130473136901855, "epoch": 1.03, "learning_rate": 4.263373988648714e-05, "loss": 14.6271, "step": 1220, "task_loss": 0.49916383624076843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1362762451171875, "compression/movement_sparsity/importance_threshold": -0.004149281721169018, "compression/movement_sparsity/linear_layer_sparsity": 0.005210288896825615, "compression/movement_sparsity/model_sparsity": 0.005031299444496189, "compression_loss": 14.719594955444336, "distillation_loss": 0.38804569840431213, "epoch": 1.03, "learning_rate": 4.262770196836131e-05, "loss": 15.0129, "step": 1221, "task_loss": 1.1559282541275024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13983998134750797, "compression/movement_sparsity/importance_threshold": -0.00413843865596128, "compression/movement_sparsity/linear_layer_sparsity": 0.0055203172553580536, "compression/movement_sparsity/model_sparsity": 0.00533067737515425, "compression_loss": 15.104253768920898, "distillation_loss": 0.22257769107818604, "epoch": 1.03, "learning_rate": 4.2621664050235485e-05, "loss": 15.4226, "step": 1222, "task_loss": 0.5349777936935425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14339750356679637, "compression/movement_sparsity/importance_threshold": -0.004127614497575614, "compression/movement_sparsity/linear_layer_sparsity": 0.005974425331434624, "compression/movement_sparsity/model_sparsity": 0.005769185441817747, "compression_loss": 15.488214492797852, "distillation_loss": 0.27086007595062256, "epoch": 1.03, "learning_rate": 4.2615626132109645e-05, "loss": 15.8748, "step": 1223, "task_loss": 0.06427006423473358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14694881719740194, "compression/movement_sparsity/importance_threshold": -0.004116809229513919, "compression/movement_sparsity/linear_layer_sparsity": 0.006461134081827645, "compression/movement_sparsity/model_sparsity": 0.006239174249343519, "compression_loss": 15.871499061584473, "distillation_loss": 0.38948312401771545, "epoch": 1.03, "learning_rate": 4.260958821398382e-05, "loss": 16.2235, "step": 1224, "task_loss": 0.883831799030304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15049392766167224, "compression/movement_sparsity/importance_threshold": -0.004106022835278097, "compression/movement_sparsity/linear_layer_sparsity": 0.007034257275077765, "compression/movement_sparsity/model_sparsity": 0.006792608897772327, "compression_loss": 16.254091262817383, "distillation_loss": 0.3500271439552307, "epoch": 1.04, "learning_rate": 4.260355029585799e-05, "loss": 16.6056, "step": 1225, "task_loss": 0.7578271627426147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1540328403819557, "compression/movement_sparsity/importance_threshold": -0.004095255298370047, "compression/movement_sparsity/linear_layer_sparsity": 0.007625135553937685, "compression/movement_sparsity/model_sparsity": 0.007363188689999208, "compression_loss": 16.636003494262695, "distillation_loss": 0.2130354642868042, "epoch": 1.04, "learning_rate": 4.259751237773216e-05, "loss": 16.9742, "step": 1226, "task_loss": 0.3529678285121918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15756556078059986, "compression/movement_sparsity/importance_threshold": -0.004084506602291672, "compression/movement_sparsity/linear_layer_sparsity": 0.008295381092581908, "compression/movement_sparsity/model_sparsity": 0.008010409232474551, "compression_loss": 17.017234802246094, "distillation_loss": 0.19248083233833313, "epoch": 1.04, "learning_rate": 4.259147445960633e-05, "loss": 17.3515, "step": 1227, "task_loss": 0.5984662771224976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1610920942799538, "compression/movement_sparsity/importance_threshold": -0.0040737767305448706, "compression/movement_sparsity/linear_layer_sparsity": 0.009028383525493679, "compression/movement_sparsity/model_sparsity": 0.008718230776836563, "compression_loss": 17.397768020629883, "distillation_loss": 0.37513667345046997, "epoch": 1.04, "learning_rate": 4.25854365414805e-05, "loss": 17.7558, "step": 1228, "task_loss": 1.1217031478881836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16461244630236543, "compression/movement_sparsity/importance_threshold": -0.004063065666631543, "compression/movement_sparsity/linear_layer_sparsity": 0.009724933777942618, "compression/movement_sparsity/model_sparsity": 0.009390852385274663, "compression_loss": 17.777629852294922, "distillation_loss": 0.2674877643585205, "epoch": 1.04, "learning_rate": 4.257939862335467e-05, "loss": 18.0458, "step": 1229, "task_loss": 0.45754510164260864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1681266222701825, "compression/movement_sparsity/importance_threshold": -0.004052373394053592, "compression/movement_sparsity/linear_layer_sparsity": 0.010412719767179196, "compression/movement_sparsity/model_sparsity": 0.010055010809903777, "compression_loss": 18.15679359436035, "distillation_loss": 0.31084680557250977, "epoch": 1.04, "learning_rate": 4.2573360705228836e-05, "loss": 18.5965, "step": 1230, "task_loss": 0.2089879810810089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17163462760575404, "compression/movement_sparsity/importance_threshold": -0.004041699896312914, "compression/movement_sparsity/linear_layer_sparsity": 0.011026683234582146, "compression/movement_sparsity/model_sparsity": 0.010647882743428887, "compression_loss": 18.53529167175293, "distillation_loss": 0.48372378945350647, "epoch": 1.04, "learning_rate": 4.256732278710301e-05, "loss": 18.9975, "step": 1231, "task_loss": 0.28310129046440125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17513646773142766, "compression/movement_sparsity/importance_threshold": -0.004031045156911412, "compression/movement_sparsity/linear_layer_sparsity": 0.01184038843405344, "compression/movement_sparsity/model_sparsity": 0.011433634666048352, "compression_loss": 18.91310691833496, "distillation_loss": 0.625645101070404, "epoch": 1.04, "learning_rate": 4.256128486897718e-05, "loss": 19.3991, "step": 1232, "task_loss": 1.3038017749786377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17863214806955124, "compression/movement_sparsity/importance_threshold": -0.004020409159350988, "compression/movement_sparsity/linear_layer_sparsity": 0.012480310814399664, "compression/movement_sparsity/model_sparsity": 0.012051573743998178, "compression_loss": 19.290283203125, "distillation_loss": 0.398506224155426, "epoch": 1.04, "learning_rate": 4.2555246950851344e-05, "loss": 19.8206, "step": 1233, "task_loss": 0.9363745450973511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18212167404247404, "compression/movement_sparsity/importance_threshold": -0.004009791887133538, "compression/movement_sparsity/linear_layer_sparsity": 0.013246140480812964, "compression/movement_sparsity/model_sparsity": 0.01279109480540256, "compression_loss": 19.66680145263672, "distillation_loss": 0.7033413648605347, "epoch": 1.04, "learning_rate": 4.254920903272552e-05, "loss": 20.199, "step": 1234, "task_loss": 0.18566125631332397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18560505107254344, "compression/movement_sparsity/importance_threshold": -0.003999193323760967, "compression/movement_sparsity/linear_layer_sparsity": 0.013944837083436358, "compression/movement_sparsity/model_sparsity": 0.013465789030283679, "compression_loss": 20.042619705200195, "distillation_loss": 0.6369860172271729, "epoch": 1.04, "learning_rate": 4.254317111459969e-05, "loss": 20.5969, "step": 1235, "task_loss": 0.8035876750946045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18908228458210785, "compression/movement_sparsity/importance_threshold": -0.0039886134527351725, "compression/movement_sparsity/linear_layer_sparsity": 0.014540878526882607, "compression/movement_sparsity/model_sparsity": 0.014041354616509594, "compression_loss": 20.417795181274414, "distillation_loss": 0.680345892906189, "epoch": 1.04, "learning_rate": 4.253713319647385e-05, "loss": 20.9379, "step": 1236, "task_loss": 0.9641849398612976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19255337999351502, "compression/movement_sparsity/importance_threshold": -0.0039780522575580575, "compression/movement_sparsity/linear_layer_sparsity": 0.015364123060462588, "compression/movement_sparsity/model_sparsity": 0.01483631816776469, "compression_loss": 20.79230499267578, "distillation_loss": 0.6249008774757385, "epoch": 1.05, "learning_rate": 4.2531095278348026e-05, "loss": 21.3149, "step": 1237, "task_loss": 0.7482644319534302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.196018342729114, "compression/movement_sparsity/importance_threshold": -0.003967509721731519, "compression/movement_sparsity/linear_layer_sparsity": 0.016132814527108498, "compression/movement_sparsity/model_sparsity": 0.015578602717759764, "compression_loss": 21.166133880615234, "distillation_loss": 0.3550962805747986, "epoch": 1.05, "learning_rate": 4.25250573602222e-05, "loss": 21.7123, "step": 1238, "task_loss": 0.7569570541381836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19947717821125255, "compression/movement_sparsity/importance_threshold": -0.003956985828757461, "compression/movement_sparsity/linear_layer_sparsity": 0.017022476674420237, "compression/movement_sparsity/model_sparsity": 0.01643770223339045, "compression_loss": 21.539316177368164, "distillation_loss": 0.19553369283676147, "epoch": 1.05, "learning_rate": 4.251901944209637e-05, "loss": 22.049, "step": 1239, "task_loss": 0.10882483422756195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20292989186227856, "compression/movement_sparsity/importance_threshold": -0.003946480562137782, "compression/movement_sparsity/linear_layer_sparsity": 0.01774728720416617, "compression/movement_sparsity/model_sparsity": 0.017137613291661614, "compression_loss": 21.91181755065918, "distillation_loss": 0.40565478801727295, "epoch": 1.05, "learning_rate": 4.2512981523970535e-05, "loss": 22.3305, "step": 1240, "task_loss": 0.5878121256828308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20637648910454076, "compression/movement_sparsity/importance_threshold": -0.003935993905374383, "compression/movement_sparsity/linear_layer_sparsity": 0.018443837456615108, "compression/movement_sparsity/model_sparsity": 0.017810234900099715, "compression_loss": 22.28365707397461, "distillation_loss": 0.33983078598976135, "epoch": 1.05, "learning_rate": 4.250694360584471e-05, "loss": 22.7013, "step": 1241, "task_loss": 0.16688188910484314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20981697536038724, "compression/movement_sparsity/importance_threshold": -0.003925525841969164, "compression/movement_sparsity/linear_layer_sparsity": 0.01903546695803609, "compression/movement_sparsity/model_sparsity": 0.01838154010808165, "compression_loss": 22.654813766479492, "distillation_loss": 0.5087032914161682, "epoch": 1.05, "learning_rate": 4.2500905687718876e-05, "loss": 23.1244, "step": 1242, "task_loss": 0.8088805079460144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21325135605216555, "compression/movement_sparsity/importance_threshold": -0.003915076355424027, "compression/movement_sparsity/linear_layer_sparsity": 0.019716790048414028, "compression/movement_sparsity/model_sparsity": 0.019039457654310123, "compression_loss": 23.025293350219727, "distillation_loss": 0.7074320912361145, "epoch": 1.05, "learning_rate": 4.249486776959304e-05, "loss": 23.543, "step": 1243, "task_loss": 1.3079643249511719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21667963660222495, "compression/movement_sparsity/importance_threshold": -0.0039046454292408694, "compression/movement_sparsity/linear_layer_sparsity": 0.020562928983854867, "compression/movement_sparsity/model_sparsity": 0.019856529114290738, "compression_loss": 23.395111083984375, "distillation_loss": 0.35303163528442383, "epoch": 1.05, "learning_rate": 4.248882985146722e-05, "loss": 23.8201, "step": 1244, "task_loss": 0.8302319645881653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22010182243291299, "compression/movement_sparsity/importance_threshold": -0.003894233046921594, "compression/movement_sparsity/linear_layer_sparsity": 0.021390001175245962, "compression/movement_sparsity/model_sparsity": 0.020655188831535883, "compression_loss": 23.764244079589844, "distillation_loss": 0.5794785618782043, "epoch": 1.05, "learning_rate": 4.2482791933341384e-05, "loss": 24.3654, "step": 1245, "task_loss": 0.6306720972061157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2235179189665779, "compression/movement_sparsity/importance_threshold": -0.003883839191968101, "compression/movement_sparsity/linear_layer_sparsity": 0.022431517597400415, "compression/movement_sparsity/model_sparsity": 0.021660925960510048, "compression_loss": 24.132734298706055, "distillation_loss": 0.35249751806259155, "epoch": 1.05, "learning_rate": 4.247675401521555e-05, "loss": 24.6967, "step": 1246, "task_loss": 0.5752303004264832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22692793162556762, "compression/movement_sparsity/importance_threshold": -0.003873463847882292, "compression/movement_sparsity/linear_layer_sparsity": 0.02340377845392578, "compression/movement_sparsity/model_sparsity": 0.02259978666558952, "compression_loss": 24.500560760498047, "distillation_loss": 0.32665061950683594, "epoch": 1.05, "learning_rate": 4.2470716097089725e-05, "loss": 25.0279, "step": 1247, "task_loss": 0.7009742259979248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23033186583223103, "compression/movement_sparsity/importance_threshold": -0.003863106998166065, "compression/movement_sparsity/linear_layer_sparsity": 0.024242309770414937, "compression/movement_sparsity/model_sparsity": 0.02340951185173322, "compression_loss": 24.867727279663086, "distillation_loss": 0.6337481737136841, "epoch": 1.05, "learning_rate": 4.246467817896389e-05, "loss": 25.442, "step": 1248, "task_loss": 0.6848210692405701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23372972700891603, "compression/movement_sparsity/importance_threshold": -0.0038527686263213207, "compression/movement_sparsity/linear_layer_sparsity": 0.02507800313500676, "compression/movement_sparsity/model_sparsity": 0.024216496578357814, "compression_loss": 25.234224319458008, "distillation_loss": 0.8047425150871277, "epoch": 1.06, "learning_rate": 4.2458640260838067e-05, "loss": 25.6424, "step": 1249, "task_loss": 0.4596101641654968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23712152057797037, "compression/movement_sparsity/importance_threshold": -0.0038424487158499623, "compression/movement_sparsity/linear_layer_sparsity": 0.02585722364167514, "compression/movement_sparsity/model_sparsity": 0.024968948463459466, "compression_loss": 25.60004997253418, "distillation_loss": 0.5374100208282471, "epoch": 1.06, "learning_rate": 4.2452602342712234e-05, "loss": 26.2348, "step": 1250, "task_loss": 0.6199985146522522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24050725196174294, "compression/movement_sparsity/importance_threshold": -0.0038321472502538868, "compression/movement_sparsity/linear_layer_sparsity": 0.026712901911224666, "compression/movement_sparsity/model_sparsity": 0.025795231552075713, "compression_loss": 25.96520233154297, "distillation_loss": 0.7549915313720703, "epoch": 1.06, "learning_rate": 4.244656442458641e-05, "loss": 26.4838, "step": 1251, "task_loss": 0.9914858341217041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24388692658258165, "compression/movement_sparsity/importance_threshold": -0.0038218642130349967, "compression/movement_sparsity/linear_layer_sparsity": 0.027609229668244854, "compression/movement_sparsity/model_sparsity": 0.026660767693215553, "compression_loss": 26.329713821411133, "distillation_loss": 0.3892171084880829, "epoch": 1.06, "learning_rate": 4.2440526506460575e-05, "loss": 26.8058, "step": 1252, "task_loss": 1.5135600566864014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24726054986283408, "compression/movement_sparsity/importance_threshold": -0.0038115995876951934, "compression/movement_sparsity/linear_layer_sparsity": 0.02852368216007155, "compression/movement_sparsity/model_sparsity": 0.027543805928763092, "compression_loss": 26.693553924560547, "distillation_loss": 0.5658009052276611, "epoch": 1.06, "learning_rate": 4.243448858833474e-05, "loss": 27.2668, "step": 1253, "task_loss": 1.0536291599273682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2506281272248496, "compression/movement_sparsity/importance_threshold": -0.0038013533577363743, "compression/movement_sparsity/linear_layer_sparsity": 0.029259713331562832, "compression/movement_sparsity/model_sparsity": 0.028254552165216915, "compression_loss": 27.056703567504883, "distillation_loss": 0.48408210277557373, "epoch": 1.06, "learning_rate": 4.2428450670208916e-05, "loss": 27.5278, "step": 1254, "task_loss": 0.7247920036315918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25398966409097584, "compression/movement_sparsity/importance_threshold": -0.0037911255066604413, "compression/movement_sparsity/linear_layer_sparsity": 0.03041721613231133, "compression/movement_sparsity/model_sparsity": 0.029372291183864578, "compression_loss": 27.419187545776367, "distillation_loss": 0.2873002588748932, "epoch": 1.06, "learning_rate": 4.242241275208308e-05, "loss": 27.8572, "step": 1255, "task_loss": 0.9619093537330627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2573451658835608, "compression/movement_sparsity/importance_threshold": -0.003780916017969296, "compression/movement_sparsity/linear_layer_sparsity": 0.031211651876883066, "compression/movement_sparsity/model_sparsity": 0.030139435616640065, "compression_loss": 27.781023025512695, "distillation_loss": 0.6032617688179016, "epoch": 1.06, "learning_rate": 4.241637483395725e-05, "loss": 28.2365, "step": 1256, "task_loss": 0.5064491629600525 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26069463802495263, "compression/movement_sparsity/importance_threshold": -0.003770724875164838, "compression/movement_sparsity/linear_layer_sparsity": 0.032219232117945855, "compression/movement_sparsity/model_sparsity": 0.031112402376742967, "compression_loss": 28.142189025878906, "distillation_loss": 0.44703471660614014, "epoch": 1.06, "learning_rate": 4.2410336915831424e-05, "loss": 28.5133, "step": 1257, "task_loss": 0.6577385067939758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2640380859375, "compression/movement_sparsity/importance_threshold": -0.0037605520617489674, "compression/movement_sparsity/linear_layer_sparsity": 0.03317983113852334, "compression/movement_sparsity/model_sparsity": 0.03204000186581538, "compression_loss": 28.50269317626953, "distillation_loss": 0.5529592633247375, "epoch": 1.06, "learning_rate": 4.240429899770559e-05, "loss": 28.9456, "step": 1258, "task_loss": 0.6998945474624634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26737551504355084, "compression/movement_sparsity/importance_threshold": -0.003750397561223585, "compression/movement_sparsity/linear_layer_sparsity": 0.03410360832944129, "compression/movement_sparsity/model_sparsity": 0.03293204446835425, "compression_loss": 28.862529754638672, "distillation_loss": 0.20232579112052917, "epoch": 1.06, "learning_rate": 4.2398261079579765e-05, "loss": 29.1511, "step": 1259, "task_loss": 0.04054655879735947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27070693076545305, "compression/movement_sparsity/importance_threshold": -0.003740261357090592, "compression/movement_sparsity/linear_layer_sparsity": 0.035138053720187676, "compression/movement_sparsity/model_sparsity": 0.033930953477602256, "compression_loss": 29.22166633605957, "distillation_loss": 0.5860152244567871, "epoch": 1.07, "learning_rate": 4.239222316145393e-05, "loss": 29.6411, "step": 1260, "task_loss": 0.6404820084571838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27403233852555536, "compression/movement_sparsity/importance_threshold": -0.003730143432851887, "compression/movement_sparsity/linear_layer_sparsity": 0.03619793336050136, "compression/movement_sparsity/model_sparsity": 0.03495442299170001, "compression_loss": 29.58013153076172, "distillation_loss": 0.44030773639678955, "epoch": 1.07, "learning_rate": 4.23861852433281e-05, "loss": 30.0218, "step": 1261, "task_loss": 0.8194950819015503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27735174374620586, "compression/movement_sparsity/importance_threshold": -0.0037200437720093715, "compression/movement_sparsity/linear_layer_sparsity": 0.037563035919790225, "compression/movement_sparsity/model_sparsity": 0.036272630078530634, "compression_loss": 29.937925338745117, "distillation_loss": 0.3283558189868927, "epoch": 1.07, "learning_rate": 4.2380147325202274e-05, "loss": 30.3759, "step": 1262, "task_loss": 0.4912737309932709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2806651518497526, "compression/movement_sparsity/importance_threshold": -0.0037099623580649464, "compression/movement_sparsity/linear_layer_sparsity": 0.03880109839708661, "compression/movement_sparsity/model_sparsity": 0.03746816130100621, "compression_loss": 30.29505729675293, "distillation_loss": 0.34929248690605164, "epoch": 1.07, "learning_rate": 4.237410940707644e-05, "loss": 30.7296, "step": 1263, "task_loss": 0.3152148127555847 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28397256825854333, "compression/movement_sparsity/importance_threshold": -0.0036998991745205126, "compression/movement_sparsity/linear_layer_sparsity": 0.03984663326373435, "compression/movement_sparsity/model_sparsity": 0.03847777882854314, "compression_loss": 30.651500701904297, "distillation_loss": 0.46013110876083374, "epoch": 1.07, "learning_rate": 4.236807148895061e-05, "loss": 31.1531, "step": 1264, "task_loss": 0.5476149916648865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2872739983949271, "compression/movement_sparsity/importance_threshold": -0.003689854204877969, "compression/movement_sparsity/linear_layer_sparsity": 0.041110833516488546, "compression/movement_sparsity/model_sparsity": 0.039698549913480354, "compression_loss": 31.00731086730957, "distillation_loss": 0.42440980672836304, "epoch": 1.07, "learning_rate": 4.236203357082478e-05, "loss": 31.4518, "step": 1265, "task_loss": 0.7424221038818359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2905694476812517, "compression/movement_sparsity/importance_threshold": -0.0036798274326392162, "compression/movement_sparsity/linear_layer_sparsity": 0.04239200185978858, "compression/movement_sparsity/model_sparsity": 0.0409357061828532, "compression_loss": 31.36248207092285, "distillation_loss": 0.571715235710144, "epoch": 1.07, "learning_rate": 4.235599565269895e-05, "loss": 32.0213, "step": 1266, "task_loss": 1.1389743089675903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.293858921539865, "compression/movement_sparsity/importance_threshold": -0.003669818841306157, "compression/movement_sparsity/linear_layer_sparsity": 0.04383340716777934, "compression/movement_sparsity/model_sparsity": 0.042327594784233084, "compression_loss": 31.717010498046875, "distillation_loss": 0.3843132555484772, "epoch": 1.07, "learning_rate": 4.234995773457312e-05, "loss": 32.2031, "step": 1267, "task_loss": 1.3387547731399536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29714242539311586, "compression/movement_sparsity/importance_threshold": -0.003659828414380689, "compression/movement_sparsity/linear_layer_sparsity": 0.04498444706966919, "compression/movement_sparsity/model_sparsity": 0.04343909292448011, "compression_loss": 32.070865631103516, "distillation_loss": 0.3448026478290558, "epoch": 1.07, "learning_rate": 4.234391981644729e-05, "loss": 32.6186, "step": 1268, "task_loss": 0.9032401442527771 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30041996466335225, "compression/movement_sparsity/importance_threshold": -0.0036498561353647133, "compression/movement_sparsity/linear_layer_sparsity": 0.04615513799982295, "compression/movement_sparsity/model_sparsity": 0.04456956701971654, "compression_loss": 32.424102783203125, "distillation_loss": 0.5070450901985168, "epoch": 1.07, "learning_rate": 4.2337881898321464e-05, "loss": 32.7972, "step": 1269, "task_loss": 0.8359112739562988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3036915447729217, "compression/movement_sparsity/importance_threshold": -0.0036399019877601324, "compression/movement_sparsity/linear_layer_sparsity": 0.04731974760448242, "compression/movement_sparsity/model_sparsity": 0.04569416870169775, "compression_loss": 32.77665328979492, "distillation_loss": 0.44396933913230896, "epoch": 1.07, "learning_rate": 4.233184398019563e-05, "loss": 33.1326, "step": 1270, "task_loss": 0.5468348264694214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3069571711441735, "compression/movement_sparsity/importance_threshold": -0.0036299659550688436, "compression/movement_sparsity/linear_layer_sparsity": 0.048803698342597936, "compression/movement_sparsity/model_sparsity": 0.047127141166792556, "compression_loss": 33.12855529785156, "distillation_loss": 0.3639471232891083, "epoch": 1.07, "learning_rate": 4.23258060620698e-05, "loss": 33.5157, "step": 1271, "task_loss": 0.41027218103408813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31021684919945514, "compression/movement_sparsity/importance_threshold": -0.0036200480207927497, "compression/movement_sparsity/linear_layer_sparsity": 0.050247977374988935, "compression/movement_sparsity/model_sparsity": 0.04852180477129892, "compression_loss": 33.479766845703125, "distillation_loss": 0.4434579312801361, "epoch": 1.08, "learning_rate": 4.231976814394397e-05, "loss": 33.8952, "step": 1272, "task_loss": 0.8820655941963196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31347058436111475, "compression/movement_sparsity/importance_threshold": -0.003610148168433751, "compression/movement_sparsity/linear_layer_sparsity": 0.05193835930321651, "compression/movement_sparsity/model_sparsity": 0.05015411688006882, "compression_loss": 33.83030319213867, "distillation_loss": 0.33706918358802795, "epoch": 1.08, "learning_rate": 4.231373022581814e-05, "loss": 34.2512, "step": 1273, "task_loss": 0.6071690320968628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31671838205150105, "compression/movement_sparsity/importance_threshold": -0.0036002663814937463, "compression/movement_sparsity/linear_layer_sparsity": 0.053383806904035824, "compression/movement_sparsity/model_sparsity": 0.05154990890908306, "compression_loss": 34.18016815185547, "distillation_loss": 0.5281939506530762, "epoch": 1.08, "learning_rate": 4.230769230769231e-05, "loss": 34.661, "step": 1274, "task_loss": 1.3885276317596436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31996024769296194, "compression/movement_sparsity/importance_threshold": -0.0035904026434746376, "compression/movement_sparsity/linear_layer_sparsity": 0.05498815596277065, "compression/movement_sparsity/model_sparsity": 0.053099143642095344, "compression_loss": 34.529396057128906, "distillation_loss": 0.41586706042289734, "epoch": 1.08, "learning_rate": 4.230165438956648e-05, "loss": 35.0392, "step": 1275, "task_loss": 0.5115939974784851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32319618670784567, "compression/movement_sparsity/importance_threshold": -0.0035805569378783244, "compression/movement_sparsity/linear_layer_sparsity": 0.05621701298265215, "compression/movement_sparsity/model_sparsity": 0.05428578564293754, "compression_loss": 34.87797546386719, "distillation_loss": 0.6999092698097229, "epoch": 1.08, "learning_rate": 4.229561647144065e-05, "loss": 35.4152, "step": 1276, "task_loss": 0.4918098449707031 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32642620451849996, "compression/movement_sparsity/importance_threshold": -0.0035707292482067094, "compression/movement_sparsity/linear_layer_sparsity": 0.05768861028309691, "compression/movement_sparsity/model_sparsity": 0.0557068290489492, "compression_loss": 35.22592544555664, "distillation_loss": 0.3220058083534241, "epoch": 1.08, "learning_rate": 4.2289578553314815e-05, "loss": 35.6123, "step": 1277, "task_loss": 0.20578202605247498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3296503065472739, "compression/movement_sparsity/importance_threshold": -0.0035609195579616895, "compression/movement_sparsity/linear_layer_sparsity": 0.05917731876409914, "compression/movement_sparsity/model_sparsity": 0.05714439581382603, "compression_loss": 35.57320022583008, "distillation_loss": 0.5208441615104675, "epoch": 1.08, "learning_rate": 4.228354063518899e-05, "loss": 36.0763, "step": 1278, "task_loss": 0.7269611954689026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33286849821651504, "compression/movement_sparsity/importance_threshold": -0.0035511278506451674, "compression/movement_sparsity/linear_layer_sparsity": 0.060484100219481, "compression/movement_sparsity/model_sparsity": 0.05840628530608555, "compression_loss": 35.9197998046875, "distillation_loss": 0.6241443753242493, "epoch": 1.08, "learning_rate": 4.227750271706316e-05, "loss": 36.9221, "step": 1279, "task_loss": 0.06261111795902252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33608078494857196, "compression/movement_sparsity/importance_threshold": -0.003541354109759043, "compression/movement_sparsity/linear_layer_sparsity": 0.06205791148490038, "compression/movement_sparsity/model_sparsity": 0.059926031312928014, "compression_loss": 36.26581954956055, "distillation_loss": 1.0040439367294312, "epoch": 1.08, "learning_rate": 4.2271464798937324e-05, "loss": 37.0246, "step": 1280, "task_loss": 1.213135838508606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3392871721657921, "compression/movement_sparsity/importance_threshold": -0.0035315983188052187, "compression/movement_sparsity/linear_layer_sparsity": 0.06349304468071468, "compression/movement_sparsity/model_sparsity": 0.061311863268479976, "compression_loss": 36.61121368408203, "distillation_loss": 0.634764552116394, "epoch": 1.08, "learning_rate": 4.22654268808115e-05, "loss": 37.2573, "step": 1281, "task_loss": 1.138067603111267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3424876652905246, "compression/movement_sparsity/importance_threshold": -0.0035218604612855912, "compression/movement_sparsity/linear_layer_sparsity": 0.06481805818841177, "compression/movement_sparsity/model_sparsity": 0.06259135848596935, "compression_loss": 36.95594024658203, "distillation_loss": 0.6046814918518066, "epoch": 1.08, "learning_rate": 4.225938896268567e-05, "loss": 37.4981, "step": 1282, "task_loss": 1.0641181468963623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3456822697451173, "compression/movement_sparsity/importance_threshold": -0.0035121405207020635, "compression/movement_sparsity/linear_layer_sparsity": 0.06629898018794778, "compression/movement_sparsity/model_sparsity": 0.06402140625897235, "compression_loss": 37.30006408691406, "distillation_loss": 0.7174438834190369, "epoch": 1.08, "learning_rate": 4.225335104455984e-05, "loss": 38.0405, "step": 1283, "task_loss": 0.6762393116950989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34887099095191787, "compression/movement_sparsity/importance_threshold": -0.0035024384805565365, "compression/movement_sparsity/linear_layer_sparsity": 0.06784382765017964, "compression/movement_sparsity/model_sparsity": 0.06551318345836987, "compression_loss": 37.643531799316406, "distillation_loss": 0.5281542539596558, "epoch": 1.09, "learning_rate": 4.2247313126434006e-05, "loss": 38.19, "step": 1284, "task_loss": 0.32907983660697937 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3520538343332754, "compression/movement_sparsity/importance_threshold": -0.003492754324350909, "compression/movement_sparsity/linear_layer_sparsity": 0.06892196319114384, "compression/movement_sparsity/model_sparsity": 0.06655428172676907, "compression_loss": 37.986392974853516, "distillation_loss": 0.3914589285850525, "epoch": 1.09, "learning_rate": 4.224127520830818e-05, "loss": 38.4515, "step": 1285, "task_loss": 0.6987112760543823 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35523080531153767, "compression/movement_sparsity/importance_threshold": -0.0034830880355870815, "compression/movement_sparsity/linear_layer_sparsity": 0.0699324290807745, "compression/movement_sparsity/model_sparsity": 0.06753003500453425, "compression_loss": 38.32862854003906, "distillation_loss": 0.5760637521743774, "epoch": 1.09, "learning_rate": 4.223523729018235e-05, "loss": 38.9482, "step": 1286, "task_loss": 0.5916048288345337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3584019093090525, "compression/movement_sparsity/importance_threshold": -0.003473439597766956, "compression/movement_sparsity/linear_layer_sparsity": 0.07135494650723005, "compression/movement_sparsity/model_sparsity": 0.06890368458121558, "compression_loss": 38.67019271850586, "distillation_loss": 0.5432088971138, "epoch": 1.09, "learning_rate": 4.2229199372056514e-05, "loss": 39.4064, "step": 1287, "task_loss": 0.1140303760766983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3615671517481689, "compression/movement_sparsity/importance_threshold": -0.0034638089943924313, "compression/movement_sparsity/linear_layer_sparsity": 0.07262639665390686, "compression/movement_sparsity/model_sparsity": 0.07013145650391588, "compression_loss": 39.01117706298828, "distillation_loss": 0.5712504982948303, "epoch": 1.09, "learning_rate": 4.222316145393069e-05, "loss": 39.5572, "step": 1288, "task_loss": 1.0062147378921509 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3647265380512345, "compression/movement_sparsity/importance_threshold": -0.003454196208965409, "compression/movement_sparsity/linear_layer_sparsity": 0.07405844149030345, "compression/movement_sparsity/model_sparsity": 0.07151430619469705, "compression_loss": 39.35145950317383, "distillation_loss": 0.5996578931808472, "epoch": 1.09, "learning_rate": 4.221712353580486e-05, "loss": 40.1684, "step": 1289, "task_loss": 0.24615256488323212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36788007364059727, "compression/movement_sparsity/importance_threshold": -0.00344460122498779, "compression/movement_sparsity/linear_layer_sparsity": 0.07515756356630676, "compression/movement_sparsity/model_sparsity": 0.07257567004609465, "compression_loss": 39.6911506652832, "distillation_loss": 0.5459069013595581, "epoch": 1.09, "learning_rate": 4.221108561767902e-05, "loss": 40.3805, "step": 1290, "task_loss": 0.6144179105758667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37102776393860626, "compression/movement_sparsity/importance_threshold": -0.0034350240259614726, "compression/movement_sparsity/linear_layer_sparsity": 0.07670222024185647, "compression/movement_sparsity/model_sparsity": 0.07406726301291947, "compression_loss": 40.0301628112793, "distillation_loss": 0.4156564772129059, "epoch": 1.09, "learning_rate": 4.22050476995532e-05, "loss": 40.6101, "step": 1291, "task_loss": 1.1754850149154663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37416961436760904, "compression/movement_sparsity/importance_threshold": -0.003425464595388359, "compression/movement_sparsity/linear_layer_sparsity": 0.07797237065426095, "compression/movement_sparsity/model_sparsity": 0.07529377985121816, "compression_loss": 40.368595123291016, "distillation_loss": 0.7206194996833801, "epoch": 1.09, "learning_rate": 4.219900978142737e-05, "loss": 41.0608, "step": 1292, "task_loss": 0.6590170860290527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.377305630349954, "compression/movement_sparsity/importance_threshold": -0.0034159229167703485, "compression/movement_sparsity/linear_layer_sparsity": 0.07923505653772539, "compression/movement_sparsity/model_sparsity": 0.07651308859010945, "compression_loss": 40.70634460449219, "distillation_loss": 0.387393057346344, "epoch": 1.09, "learning_rate": 4.219297186330153e-05, "loss": 41.1842, "step": 1293, "task_loss": 0.3437434136867523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38043581730798876, "compression/movement_sparsity/importance_threshold": -0.003406398973609344, "compression/movement_sparsity/linear_layer_sparsity": 0.08078425632114436, "compression/movement_sparsity/model_sparsity": 0.07800906859507199, "compression_loss": 41.04345703125, "distillation_loss": 0.599915623664856, "epoch": 1.09, "learning_rate": 4.2186933945175705e-05, "loss": 41.6076, "step": 1294, "task_loss": 0.8313719034194946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3835601806640625, "compression/movement_sparsity/importance_threshold": -0.003396892749407243, "compression/movement_sparsity/linear_layer_sparsity": 0.08215526134341297, "compression/movement_sparsity/model_sparsity": 0.0793329753771209, "compression_loss": 41.37990951538086, "distillation_loss": 0.47436046600341797, "epoch": 1.09, "learning_rate": 4.218089602704988e-05, "loss": 41.9317, "step": 1295, "task_loss": 0.5215071439743042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3866787258405228, "compression/movement_sparsity/importance_threshold": -0.003387404227665948, "compression/movement_sparsity/linear_layer_sparsity": 0.08378419803581295, "compression/movement_sparsity/model_sparsity": 0.08090595308294155, "compression_loss": 41.715736389160156, "distillation_loss": 0.6345131993293762, "epoch": 1.1, "learning_rate": 4.217485810892404e-05, "loss": 42.2746, "step": 1296, "task_loss": 0.2941673994064331 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3897914582597178, "compression/movement_sparsity/importance_threshold": -0.003377933391887359, "compression/movement_sparsity/linear_layer_sparsity": 0.0852309215225693, "compression/movement_sparsity/model_sparsity": 0.0823029771672858, "compression_loss": 42.05088806152344, "distillation_loss": 0.7908411026000977, "epoch": 1.1, "learning_rate": 4.216882019079821e-05, "loss": 42.5886, "step": 1297, "task_loss": 0.5576940178871155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3928983833439961, "compression/movement_sparsity/importance_threshold": -0.003368480225573375, "compression/movement_sparsity/linear_layer_sparsity": 0.08685205980933543, "compression/movement_sparsity/model_sparsity": 0.0838684243666968, "compression_loss": 42.3853759765625, "distillation_loss": 0.7869074940681458, "epoch": 1.1, "learning_rate": 4.216278227267239e-05, "loss": 42.8715, "step": 1298, "task_loss": 0.4372589588165283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39599950651570576, "compression/movement_sparsity/importance_threshold": -0.003359044712225898, "compression/movement_sparsity/linear_layer_sparsity": 0.08861368863918728, "compression/movement_sparsity/model_sparsity": 0.08556953582683907, "compression_loss": 42.71921920776367, "distillation_loss": 0.6729541420936584, "epoch": 1.1, "learning_rate": 4.2156744354546554e-05, "loss": 43.2173, "step": 1299, "task_loss": 0.7337606549263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3990948331971949, "compression/movement_sparsity/importance_threshold": -0.003349626835346828, "compression/movement_sparsity/linear_layer_sparsity": 0.09031417033740244, "compression/movement_sparsity/model_sparsity": 0.08721160074742694, "compression_loss": 43.05238723754883, "distillation_loss": 0.481242835521698, "epoch": 1.1, "learning_rate": 4.215070643642072e-05, "loss": 43.4554, "step": 1300, "task_loss": 0.44292038679122925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40218436881081115, "compression/movement_sparsity/importance_threshold": -0.0033402265784380665, "compression/movement_sparsity/linear_layer_sparsity": 0.0917581751139378, "compression/movement_sparsity/model_sparsity": 0.08860599951761004, "compression_loss": 43.384883880615234, "distillation_loss": 0.48726382851600647, "epoch": 1.1, "learning_rate": 4.2144668518294896e-05, "loss": 43.8635, "step": 1301, "task_loss": 0.5337143540382385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40526811877890373, "compression/movement_sparsity/importance_threshold": -0.003330843925001512, "compression/movement_sparsity/linear_layer_sparsity": 0.09375038157336435, "compression/movement_sparsity/model_sparsity": 0.09052976755641136, "compression_loss": 43.716739654541016, "distillation_loss": 0.8898061513900757, "epoch": 1.1, "learning_rate": 4.213863060016906e-05, "loss": 44.3364, "step": 1302, "task_loss": 1.9387022256851196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40834608852382054, "compression/movement_sparsity/importance_threshold": -0.0033214788585390655, "compression/movement_sparsity/linear_layer_sparsity": 0.09538371828362195, "compression/movement_sparsity/model_sparsity": 0.09210699412594017, "compression_loss": 44.0479850769043, "distillation_loss": 0.47995662689208984, "epoch": 1.1, "learning_rate": 4.213259268204323e-05, "loss": 44.602, "step": 1303, "task_loss": 0.4069124758243561 }, { "compression/movement_sparsity/importance_regularization_factor": 0.411418283467909, "compression/movement_sparsity/importance_threshold": -0.00331213136255263, "compression/movement_sparsity/linear_layer_sparsity": 0.09698178330601268, "compression/movement_sparsity/model_sparsity": 0.09365016069858872, "compression_loss": 44.37854766845703, "distillation_loss": 0.40206286311149597, "epoch": 1.1, "learning_rate": 4.2126554763917404e-05, "loss": 44.9244, "step": 1304, "task_loss": 1.0917549133300781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4144847090335183, "compression/movement_sparsity/importance_threshold": -0.0033028014205441023, "compression/movement_sparsity/linear_layer_sparsity": 0.09879221775399813, "compression/movement_sparsity/model_sparsity": 0.09539840115373806, "compression_loss": 44.708492279052734, "distillation_loss": 0.38143932819366455, "epoch": 1.1, "learning_rate": 4.212051684579157e-05, "loss": 45.2843, "step": 1305, "task_loss": 0.8777618408203125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4175453706429964, "compression/movement_sparsity/importance_threshold": -0.0032934890160153845, "compression/movement_sparsity/linear_layer_sparsity": 0.10060379692207662, "compression/movement_sparsity/model_sparsity": 0.09714774700432367, "compression_loss": 45.03784942626953, "distillation_loss": 0.23496291041374207, "epoch": 1.1, "learning_rate": 4.211447892766574e-05, "loss": 45.3763, "step": 1306, "task_loss": 0.5907947421073914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4206002737186907, "compression/movement_sparsity/importance_threshold": -0.0032841941324683785, "compression/movement_sparsity/linear_layer_sparsity": 0.10225726162730356, "compression/movement_sparsity/model_sparsity": 0.09874441011027367, "compression_loss": 45.36653137207031, "distillation_loss": 0.6294994354248047, "epoch": 1.1, "learning_rate": 4.210844100953991e-05, "loss": 45.9615, "step": 1307, "task_loss": 1.5922635793685913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42364942368295055, "compression/movement_sparsity/importance_threshold": -0.0032749167534049817, "compression/movement_sparsity/linear_layer_sparsity": 0.10414325952559747, "compression/movement_sparsity/model_sparsity": 0.10056561817875301, "compression_loss": 45.694557189941406, "distillation_loss": 0.2052983194589615, "epoch": 1.11, "learning_rate": 4.2102403091414086e-05, "loss": 46.1253, "step": 1308, "task_loss": 0.5349974632263184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42669282595812374, "compression/movement_sparsity/importance_threshold": -0.0032656568623270956, "compression/movement_sparsity/linear_layer_sparsity": 0.10596450919362864, "compression/movement_sparsity/model_sparsity": 0.102324302317868, "compression_loss": 46.02192687988281, "distillation_loss": 0.433124303817749, "epoch": 1.11, "learning_rate": 4.209636517328825e-05, "loss": 46.5536, "step": 1309, "task_loss": 0.5030676126480103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42973048596655783, "compression/movement_sparsity/importance_threshold": -0.0032564144427366234, "compression/movement_sparsity/linear_layer_sparsity": 0.10777036476124191, "compression/movement_sparsity/model_sparsity": 0.10406812119127222, "compression_loss": 46.34867858886719, "distillation_loss": 0.4955660402774811, "epoch": 1.11, "learning_rate": 4.209032725516242e-05, "loss": 46.8545, "step": 1310, "task_loss": 0.5676338076591492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4327624091306019, "compression/movement_sparsity/importance_threshold": -0.003247189478135462, "compression/movement_sparsity/linear_layer_sparsity": 0.10968913027219918, "compression/movement_sparsity/model_sparsity": 0.10592097120411496, "compression_loss": 46.674800872802734, "distillation_loss": 0.5332227945327759, "epoch": 1.11, "learning_rate": 4.2084289337036595e-05, "loss": 47.2699, "step": 1311, "task_loss": 0.4865185022354126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4357886008726038, "compression/movement_sparsity/importance_threshold": -0.003237981952025513, "compression/movement_sparsity/linear_layer_sparsity": 0.11165584286122024, "compression/movement_sparsity/model_sparsity": 0.10782012116538756, "compression_loss": 47.000335693359375, "distillation_loss": 0.5922881364822388, "epoch": 1.11, "learning_rate": 4.207825141891076e-05, "loss": 47.4329, "step": 1312, "task_loss": 0.7178448438644409 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4388090666149118, "compression/movement_sparsity/importance_threshold": -0.003228791847908677, "compression/movement_sparsity/linear_layer_sparsity": 0.1137394599897433, "compression/movement_sparsity/model_sparsity": 0.10983215963558982, "compression_loss": 47.32521438598633, "distillation_loss": 0.5975414514541626, "epoch": 1.11, "learning_rate": 4.207221350078493e-05, "loss": 47.7347, "step": 1313, "task_loss": 1.5485409498214722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4418238117798735, "compression/movement_sparsity/importance_threshold": -0.0032196191492868554, "compression/movement_sparsity/linear_layer_sparsity": 0.11582783486115308, "compression/movement_sparsity/model_sparsity": 0.1118487924055741, "compression_loss": 47.64946365356445, "distillation_loss": 0.7130229473114014, "epoch": 1.11, "learning_rate": 4.20661755826591e-05, "loss": 48.2814, "step": 1314, "task_loss": 0.5504848957061768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44483284178983795, "compression/movement_sparsity/importance_threshold": -0.003210463839661948, "compression/movement_sparsity/linear_layer_sparsity": 0.11770981431495371, "compression/movement_sparsity/model_sparsity": 0.11366612007549068, "compression_loss": 47.97305679321289, "distillation_loss": 0.48751258850097656, "epoch": 1.11, "learning_rate": 4.206013766453327e-05, "loss": 48.421, "step": 1315, "task_loss": 1.4994933605194092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44783616206715304, "compression/movement_sparsity/importance_threshold": -0.003201325902535854, "compression/movement_sparsity/linear_layer_sparsity": 0.11952941844785113, "compression/movement_sparsity/model_sparsity": 0.11542321520866601, "compression_loss": 48.29603576660156, "distillation_loss": 0.5306200981140137, "epoch": 1.11, "learning_rate": 4.205409974640744e-05, "loss": 48.7778, "step": 1316, "task_loss": 0.36964669823646545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.450833778034167, "compression/movement_sparsity/importance_threshold": -0.0031922053214104747, "compression/movement_sparsity/linear_layer_sparsity": 0.1215770245901139, "compression/movement_sparsity/model_sparsity": 0.11740047978076877, "compression_loss": 48.618343353271484, "distillation_loss": 0.4229336380958557, "epoch": 1.11, "learning_rate": 4.204806182828161e-05, "loss": 49.0859, "step": 1317, "task_loss": 0.42890480160713196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45382569511322746, "compression/movement_sparsity/importance_threshold": -0.003183102079787712, "compression/movement_sparsity/linear_layer_sparsity": 0.1235310856372733, "compression/movement_sparsity/model_sparsity": 0.11928741281956334, "compression_loss": 48.940059661865234, "distillation_loss": 0.4254857003688812, "epoch": 1.11, "learning_rate": 4.204202391015578e-05, "loss": 49.4511, "step": 1318, "task_loss": 0.2547108232975006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4568119187266834, "compression/movement_sparsity/importance_threshold": -0.003174016161169465, "compression/movement_sparsity/linear_layer_sparsity": 0.12543979907491354, "compression/movement_sparsity/model_sparsity": 0.12113055607873129, "compression_loss": 49.261146545410156, "distillation_loss": 0.5012710690498352, "epoch": 1.11, "learning_rate": 4.203598599202995e-05, "loss": 49.7314, "step": 1319, "task_loss": 0.288612425327301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4597924542968828, "compression/movement_sparsity/importance_threshold": -0.003164947549057633, "compression/movement_sparsity/linear_layer_sparsity": 0.12745320670439664, "compression/movement_sparsity/model_sparsity": 0.1230747969621753, "compression_loss": 49.581607818603516, "distillation_loss": 0.6273993253707886, "epoch": 1.12, "learning_rate": 4.202994807390412e-05, "loss": 50.4047, "step": 1320, "task_loss": 0.9354586005210876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46276730724617354, "compression/movement_sparsity/importance_threshold": -0.0031558962269541196, "compression/movement_sparsity/linear_layer_sparsity": 0.12927622114923792, "compression/movement_sparsity/model_sparsity": 0.12483518525258787, "compression_loss": 49.90154266357422, "distillation_loss": 0.5354970693588257, "epoch": 1.12, "learning_rate": 4.202391015577829e-05, "loss": 50.5532, "step": 1321, "task_loss": 0.5257239937782288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4657364829969042, "compression/movement_sparsity/importance_threshold": -0.003146862178360822, "compression/movement_sparsity/linear_layer_sparsity": 0.13102729709073205, "compression/movement_sparsity/model_sparsity": 0.12652610634855196, "compression_loss": 50.220909118652344, "distillation_loss": 0.7406328320503235, "epoch": 1.12, "learning_rate": 4.201787223765246e-05, "loss": 51.2674, "step": 1322, "task_loss": 1.5844182968139648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46869998697142295, "compression/movement_sparsity/importance_threshold": -0.0031378453867796424, "compression/movement_sparsity/linear_layer_sparsity": 0.13292184461722087, "compression/movement_sparsity/model_sparsity": 0.128355570339196, "compression_loss": 50.539695739746094, "distillation_loss": 0.627805233001709, "epoch": 1.12, "learning_rate": 4.201183431952663e-05, "loss": 51.1895, "step": 1323, "task_loss": 0.5349506139755249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4716578245920773, "compression/movement_sparsity/importance_threshold": -0.0031288458357124826, "compression/movement_sparsity/linear_layer_sparsity": 0.13435700166137043, "compression/movement_sparsity/model_sparsity": 0.12974142532381955, "compression_loss": 50.85783386230469, "distillation_loss": 0.4615139663219452, "epoch": 1.12, "learning_rate": 4.20057964014008e-05, "loss": 51.3966, "step": 1324, "task_loss": 0.758313775062561 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47461000128121655, "compression/movement_sparsity/importance_threshold": -0.003119863508661239, "compression/movement_sparsity/linear_layer_sparsity": 0.13604071797988956, "compression/movement_sparsity/model_sparsity": 0.13136730080708028, "compression_loss": 51.1754035949707, "distillation_loss": 1.027340292930603, "epoch": 1.12, "learning_rate": 4.199975848327497e-05, "loss": 51.8103, "step": 1325, "task_loss": 0.8412054181098938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4775565224611882, "compression/movement_sparsity/importance_threshold": -0.0031108983891278154, "compression/movement_sparsity/linear_layer_sparsity": 0.13766493270190572, "compression/movement_sparsity/model_sparsity": 0.1329357187567263, "compression_loss": 51.4923095703125, "distillation_loss": 0.5421846508979797, "epoch": 1.12, "learning_rate": 4.1993720565149136e-05, "loss": 52.0206, "step": 1326, "task_loss": 0.5908591747283936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48049739355434024, "compression/movement_sparsity/importance_threshold": -0.0031019504606141124, "compression/movement_sparsity/linear_layer_sparsity": 0.13948147232372074, "compression/movement_sparsity/model_sparsity": 0.13468985465420244, "compression_loss": 51.80863952636719, "distillation_loss": 1.5145933628082275, "epoch": 1.12, "learning_rate": 4.198768264702331e-05, "loss": 52.7521, "step": 1327, "task_loss": 0.8470127582550049 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4834326199830216, "compression/movement_sparsity/importance_threshold": -0.003093019706622028, "compression/movement_sparsity/linear_layer_sparsity": 0.1410719058788245, "compression/movement_sparsity/model_sparsity": 0.13622565192394248, "compression_loss": 52.12431335449219, "distillation_loss": 0.4998517334461212, "epoch": 1.12, "learning_rate": 4.198164472889748e-05, "loss": 52.6055, "step": 1328, "task_loss": 0.38575848937034607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48636220716958023, "compression/movement_sparsity/importance_threshold": -0.003084106110653464, "compression/movement_sparsity/linear_layer_sparsity": 0.14282931355333328, "compression/movement_sparsity/model_sparsity": 0.1379226872384135, "compression_loss": 52.43937301635742, "distillation_loss": 0.7924885749816895, "epoch": 1.12, "learning_rate": 4.197560681077165e-05, "loss": 53.2151, "step": 1329, "task_loss": 0.2703063488006592 }, { "compression/movement_sparsity/importance_regularization_factor": 0.489286160536364, "compression/movement_sparsity/importance_threshold": -0.003075209656210321, "compression/movement_sparsity/linear_layer_sparsity": 0.1446762121059492, "compression/movement_sparsity/model_sparsity": 0.13970613914402252, "compression_loss": 52.753841400146484, "distillation_loss": 0.5064437389373779, "epoch": 1.12, "learning_rate": 4.196956889264582e-05, "loss": 53.269, "step": 1330, "task_loss": 1.033895492553711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49220448550572093, "compression/movement_sparsity/importance_threshold": -0.003066330326794501, "compression/movement_sparsity/linear_layer_sparsity": 0.1465540061769096, "compression/movement_sparsity/model_sparsity": 0.14151942521187524, "compression_loss": 53.06763458251953, "distillation_loss": 0.5594974756240845, "epoch": 1.13, "learning_rate": 4.1963530974519986e-05, "loss": 53.6294, "step": 1331, "task_loss": 1.134873628616333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4951171875, "compression/movement_sparsity/importance_threshold": -0.003057468105907901, "compression/movement_sparsity/linear_layer_sparsity": 0.14849322163536238, "compression/movement_sparsity/model_sparsity": 0.1433920226536056, "compression_loss": 53.380760192871094, "distillation_loss": 0.6891230344772339, "epoch": 1.13, "learning_rate": 4.195749305639416e-05, "loss": 54.2164, "step": 1332, "task_loss": 1.2063493728637695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4980242719415489, "compression/movement_sparsity/importance_threshold": -0.003048622977052424, "compression/movement_sparsity/linear_layer_sparsity": 0.1505457643830264, "compression/movement_sparsity/model_sparsity": 0.14537405424352728, "compression_loss": 53.69331741333008, "distillation_loss": 0.3187189996242523, "epoch": 1.13, "learning_rate": 4.195145513826833e-05, "loss": 54.1606, "step": 1333, "task_loss": 1.4558395147323608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5009257442527161, "compression/movement_sparsity/importance_threshold": -0.003039794923729969, "compression/movement_sparsity/linear_layer_sparsity": 0.1525055371064834, "compression/movement_sparsity/model_sparsity": 0.14726650274496747, "compression_loss": 54.005271911621094, "distillation_loss": 0.35164231061935425, "epoch": 1.13, "learning_rate": 4.1945417220142494e-05, "loss": 54.5363, "step": 1334, "task_loss": 0.9146254062652588 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5038216098558489, "compression/movement_sparsity/importance_threshold": -0.003030983929442439, "compression/movement_sparsity/linear_layer_sparsity": 0.15439517187590623, "compression/movement_sparsity/model_sparsity": 0.14909122274686412, "compression_loss": 54.31656265258789, "distillation_loss": 0.684345006942749, "epoch": 1.13, "learning_rate": 4.193937930201667e-05, "loss": 54.9945, "step": 1335, "task_loss": 0.17185698449611664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5067118741732968, "compression/movement_sparsity/importance_threshold": -0.003022189977691731, "compression/movement_sparsity/linear_layer_sparsity": 0.1564990720135779, "compression/movement_sparsity/model_sparsity": 0.1511228474424529, "compression_loss": 54.62724685668945, "distillation_loss": 0.37548866868019104, "epoch": 1.13, "learning_rate": 4.1933341383890835e-05, "loss": 55.2774, "step": 1336, "task_loss": 0.45851272344589233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5095965426274073, "compression/movement_sparsity/importance_threshold": -0.003013413051979748, "compression/movement_sparsity/linear_layer_sparsity": 0.15842554053682795, "compression/movement_sparsity/model_sparsity": 0.15298313584541892, "compression_loss": 54.93730545043945, "distillation_loss": 0.4902113378047943, "epoch": 1.13, "learning_rate": 4.1927303465765e-05, "loss": 55.5883, "step": 1337, "task_loss": 0.7764456272125244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5124756206405283, "compression/movement_sparsity/importance_threshold": -0.00300465313580839, "compression/movement_sparsity/linear_layer_sparsity": 0.16029244784273683, "compression/movement_sparsity/model_sparsity": 0.1547859091420912, "compression_loss": 55.24681854248047, "distillation_loss": 0.8601358532905579, "epoch": 1.13, "learning_rate": 4.1921265547639176e-05, "loss": 55.9548, "step": 1338, "task_loss": 1.9680184125900269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5153491136350088, "compression/movement_sparsity/importance_threshold": -0.0029959102126795563, "compression/movement_sparsity/linear_layer_sparsity": 0.16236557170374033, "compression/movement_sparsity/model_sparsity": 0.15678781482079426, "compression_loss": 55.555747985839844, "distillation_loss": 1.0419814586639404, "epoch": 1.13, "learning_rate": 4.191522762951335e-05, "loss": 56.1963, "step": 1339, "task_loss": 1.3514772653579712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5182170270331966, "compression/movement_sparsity/importance_threshold": -0.002987184266095148, "compression/movement_sparsity/linear_layer_sparsity": 0.16425079453113792, "compression/movement_sparsity/model_sparsity": 0.15860827444444697, "compression_loss": 55.86402893066406, "distillation_loss": 0.7991650104522705, "epoch": 1.13, "learning_rate": 4.190918971138752e-05, "loss": 56.3824, "step": 1340, "task_loss": 1.3947752714157104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5210793662574396, "compression/movement_sparsity/importance_threshold": -0.0029784752795570663, "compression/movement_sparsity/linear_layer_sparsity": 0.16629516922397136, "compression/movement_sparsity/model_sparsity": 0.1605824185773494, "compression_loss": 56.17171096801758, "distillation_loss": 0.41190028190612793, "epoch": 1.13, "learning_rate": 4.1903151793261685e-05, "loss": 56.7545, "step": 1341, "task_loss": 0.29174506664276123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5239361367300863, "compression/movement_sparsity/importance_threshold": -0.0029697832365672104, "compression/movement_sparsity/linear_layer_sparsity": 0.16845872597232525, "compression/movement_sparsity/model_sparsity": 0.16267165049551827, "compression_loss": 56.478721618652344, "distillation_loss": 0.28209346532821655, "epoch": 1.13, "learning_rate": 4.189711387513586e-05, "loss": 57.1874, "step": 1342, "task_loss": 0.03726118057966232 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5267873438734849, "compression/movement_sparsity/importance_threshold": -0.002961108120627482, "compression/movement_sparsity/linear_layer_sparsity": 0.1705467908153765, "compression/movement_sparsity/model_sparsity": 0.1646879838875719, "compression_loss": 56.78512191772461, "distillation_loss": 0.6510779857635498, "epoch": 1.14, "learning_rate": 4.1891075957010026e-05, "loss": 57.3976, "step": 1343, "task_loss": 0.38953909277915955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5296329931099835, "compression/movement_sparsity/importance_threshold": -0.0029524499152397813, "compression/movement_sparsity/linear_layer_sparsity": 0.17251473159366404, "compression/movement_sparsity/model_sparsity": 0.1665883198460313, "compression_loss": 57.0909309387207, "distillation_loss": 0.7534204721450806, "epoch": 1.14, "learning_rate": 4.188503803888419e-05, "loss": 57.9751, "step": 1344, "task_loss": 0.9102391004562378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5324730898619304, "compression/movement_sparsity/importance_threshold": -0.0029438086039060077, "compression/movement_sparsity/linear_layer_sparsity": 0.17450223993104205, "compression/movement_sparsity/model_sparsity": 0.16850755115772958, "compression_loss": 57.39614486694336, "distillation_loss": 0.651016116142273, "epoch": 1.14, "learning_rate": 4.187900012075837e-05, "loss": 58.0904, "step": 1345, "task_loss": 1.4844597578048706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5353076395516739, "compression/movement_sparsity/importance_threshold": -0.002935184170128062, "compression/movement_sparsity/linear_layer_sparsity": 0.17653915009493545, "compression/movement_sparsity/model_sparsity": 0.17047448719122463, "compression_loss": 57.70078659057617, "distillation_loss": 0.8724663257598877, "epoch": 1.14, "learning_rate": 4.1872962202632534e-05, "loss": 58.6078, "step": 1346, "task_loss": 0.7334779500961304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5381366476015617, "compression/movement_sparsity/importance_threshold": -0.0029265765974078473, "compression/movement_sparsity/linear_layer_sparsity": 0.17854991055920338, "compression/movement_sparsity/model_sparsity": 0.17241617184772223, "compression_loss": 58.00483703613281, "distillation_loss": 0.44548577070236206, "epoch": 1.14, "learning_rate": 4.18669242845067e-05, "loss": 58.6765, "step": 1347, "task_loss": 0.40221214294433594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5409601194339426, "compression/movement_sparsity/importance_threshold": -0.002917985869247259, "compression/movement_sparsity/linear_layer_sparsity": 0.18062875802067213, "compression/movement_sparsity/model_sparsity": 0.1744236045036067, "compression_loss": 58.308311462402344, "distillation_loss": 0.6168889999389648, "epoch": 1.14, "learning_rate": 4.1860886366380875e-05, "loss": 59.0554, "step": 1348, "task_loss": 0.8127673864364624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5437780604711646, "compression/movement_sparsity/importance_threshold": -0.0029094119691482016, "compression/movement_sparsity/linear_layer_sparsity": 0.18277366537084352, "compression/movement_sparsity/model_sparsity": 0.1764948276877929, "compression_loss": 58.61111831665039, "distillation_loss": 0.3418865203857422, "epoch": 1.14, "learning_rate": 4.185484844825505e-05, "loss": 59.0452, "step": 1349, "task_loss": 0.024086998775601387 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5465904761355758, "compression/movement_sparsity/importance_threshold": -0.0029008548806125738, "compression/movement_sparsity/linear_layer_sparsity": 0.18469504227451303, "compression/movement_sparsity/model_sparsity": 0.17835019938397464, "compression_loss": 58.913307189941406, "distillation_loss": 0.7119177579879761, "epoch": 1.14, "learning_rate": 4.184881053012921e-05, "loss": 59.7898, "step": 1350, "task_loss": 1.1302666664123535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.549397371849524, "compression/movement_sparsity/importance_threshold": -0.002892314587142278, "compression/movement_sparsity/linear_layer_sparsity": 0.18644762066112927, "compression/movement_sparsity/model_sparsity": 0.18004257131144885, "compression_loss": 59.214942932128906, "distillation_loss": 0.4487518072128296, "epoch": 1.14, "learning_rate": 4.1842772612003383e-05, "loss": 60.0115, "step": 1351, "task_loss": 1.062807321548462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5521987530353583, "compression/movement_sparsity/importance_threshold": -0.002883791072239212, "compression/movement_sparsity/linear_layer_sparsity": 0.18873137301925266, "compression/movement_sparsity/model_sparsity": 0.1822478697504267, "compression_loss": 59.51594924926758, "distillation_loss": 0.7475280165672302, "epoch": 1.14, "learning_rate": 4.183673469387756e-05, "loss": 60.3057, "step": 1352, "task_loss": 1.2702032327651978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5549946251154263, "compression/movement_sparsity/importance_threshold": -0.002875284319405278, "compression/movement_sparsity/linear_layer_sparsity": 0.19094346112988453, "compression/movement_sparsity/model_sparsity": 0.18438396582927935, "compression_loss": 59.81637954711914, "distillation_loss": 0.8748681545257568, "epoch": 1.14, "learning_rate": 4.183069677575172e-05, "loss": 60.6006, "step": 1353, "task_loss": 0.9828934073448181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5577849935120762, "compression/movement_sparsity/importance_threshold": -0.0028667943121423755, "compression/movement_sparsity/linear_layer_sparsity": 0.19306390008806712, "compression/movement_sparsity/model_sparsity": 0.18643156118601514, "compression_loss": 60.11622619628906, "distillation_loss": 0.756619930267334, "epoch": 1.14, "learning_rate": 4.182465885762589e-05, "loss": 60.8456, "step": 1354, "task_loss": 0.6199924945831299 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5605698636476562, "compression/movement_sparsity/importance_threshold": -0.0028583210339524065, "compression/movement_sparsity/linear_layer_sparsity": 0.19529774980463444, "compression/movement_sparsity/model_sparsity": 0.18858867129269283, "compression_loss": 60.41544723510742, "distillation_loss": 0.7377138137817383, "epoch": 1.15, "learning_rate": 4.1818620939500066e-05, "loss": 61.1416, "step": 1355, "task_loss": 0.194289892911911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5633492409445147, "compression/movement_sparsity/importance_threshold": -0.0028498644683372697, "compression/movement_sparsity/linear_layer_sparsity": 0.19737585796770973, "compression/movement_sparsity/model_sparsity": 0.190595390047358, "compression_loss": 60.71408462524414, "distillation_loss": 0.6436585783958435, "epoch": 1.15, "learning_rate": 4.181258302137423e-05, "loss": 61.5809, "step": 1356, "task_loss": 0.6387341618537903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5661231308249999, "compression/movement_sparsity/importance_threshold": -0.002841424598798866, "compression/movement_sparsity/linear_layer_sparsity": 0.19943336116911026, "compression/movement_sparsity/model_sparsity": 0.19258221168417025, "compression_loss": 61.012107849121094, "distillation_loss": 0.579427182674408, "epoch": 1.15, "learning_rate": 4.18065451032484e-05, "loss": 61.7557, "step": 1357, "task_loss": 0.4528934061527252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5688915387114595, "compression/movement_sparsity/importance_threshold": -0.002833001408839097, "compression/movement_sparsity/linear_layer_sparsity": 0.2011861780390792, "compression/movement_sparsity/model_sparsity": 0.19427481390236034, "compression_loss": 61.30952072143555, "distillation_loss": 0.8669440150260925, "epoch": 1.15, "learning_rate": 4.1800507185122574e-05, "loss": 62.0841, "step": 1358, "task_loss": 1.4366141557693481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5716544700262425, "compression/movement_sparsity/importance_threshold": -0.0028245948819598616, "compression/movement_sparsity/linear_layer_sparsity": 0.2033785316522737, "compression/movement_sparsity/model_sparsity": 0.19639185342447305, "compression_loss": 61.60634994506836, "distillation_loss": 0.8479097485542297, "epoch": 1.15, "learning_rate": 4.179446926699674e-05, "loss": 62.3573, "step": 1359, "task_loss": 0.36244797706604004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5744119301916966, "compression/movement_sparsity/importance_threshold": -0.0028162050016630608, "compression/movement_sparsity/linear_layer_sparsity": 0.20544922298307952, "compression/movement_sparsity/model_sparsity": 0.198391410137874, "compression_loss": 61.90259552001953, "distillation_loss": 1.0806055068969727, "epoch": 1.15, "learning_rate": 4.178843134887091e-05, "loss": 62.7493, "step": 1360, "task_loss": 1.2633005380630493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5771639246301696, "compression/movement_sparsity/importance_threshold": -0.0028078317514505964, "compression/movement_sparsity/linear_layer_sparsity": 0.20752065361227873, "compression/movement_sparsity/model_sparsity": 0.20039168075249425, "compression_loss": 62.19822692871094, "distillation_loss": 0.8248847723007202, "epoch": 1.15, "learning_rate": 4.178239343074508e-05, "loss": 63.0963, "step": 1361, "task_loss": 1.3339134454727173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5799104587640107, "compression/movement_sparsity/importance_threshold": -0.002799475114824366, "compression/movement_sparsity/linear_layer_sparsity": 0.2099370623353511, "compression/movement_sparsity/model_sparsity": 0.20272507840218637, "compression_loss": 62.493255615234375, "distillation_loss": 0.9832310676574707, "epoch": 1.15, "learning_rate": 4.177635551261925e-05, "loss": 63.5547, "step": 1362, "task_loss": 0.8253710865974426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5826515380155675, "compression/movement_sparsity/importance_threshold": -0.002791135075286273, "compression/movement_sparsity/linear_layer_sparsity": 0.2119308547090732, "compression/movement_sparsity/model_sparsity": 0.20465037787424836, "compression_loss": 62.78776931762695, "distillation_loss": 0.9118959307670593, "epoch": 1.15, "learning_rate": 4.177031759449342e-05, "loss": 63.5636, "step": 1363, "task_loss": 0.42578306794166565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.585387167807188, "compression/movement_sparsity/importance_threshold": -0.0027828116163382166, "compression/movement_sparsity/linear_layer_sparsity": 0.21404406762166847, "compression/movement_sparsity/model_sparsity": 0.20669099542229266, "compression_loss": 63.08169937133789, "distillation_loss": 0.632866382598877, "epoch": 1.15, "learning_rate": 4.176427967636759e-05, "loss": 63.7584, "step": 1364, "task_loss": 0.8126760125160217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5881173535612209, "compression/movement_sparsity/importance_threshold": -0.002774504721482097, "compression/movement_sparsity/linear_layer_sparsity": 0.21595244718261492, "compression/movement_sparsity/model_sparsity": 0.20853381627445836, "compression_loss": 63.37501907348633, "distillation_loss": 0.6368722915649414, "epoch": 1.15, "learning_rate": 4.1758241758241765e-05, "loss": 64.1358, "step": 1365, "task_loss": 1.117645025253296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5908421007000141, "compression/movement_sparsity/importance_threshold": -0.0027662143742198143, "compression/movement_sparsity/linear_layer_sparsity": 0.21804513860270885, "compression/movement_sparsity/model_sparsity": 0.21055461730640027, "compression_loss": 63.667762756347656, "distillation_loss": 0.7271023392677307, "epoch": 1.15, "learning_rate": 4.1752203840115925e-05, "loss": 64.5768, "step": 1366, "task_loss": 0.9656323194503784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5935614146459156, "compression/movement_sparsity/importance_threshold": -0.0027579405580532707, "compression/movement_sparsity/linear_layer_sparsity": 0.2199053802989093, "compression/movement_sparsity/model_sparsity": 0.2123509539775634, "compression_loss": 63.9599494934082, "distillation_loss": 1.3375957012176514, "epoch": 1.16, "learning_rate": 4.17461659219901e-05, "loss": 64.8956, "step": 1367, "task_loss": 1.154859185218811 }, { "compression/movement_sparsity/importance_regularization_factor": 0.596275300821274, "compression/movement_sparsity/importance_threshold": -0.0027496832564843643, "compression/movement_sparsity/linear_layer_sparsity": 0.22189633472073408, "compression/movement_sparsity/model_sparsity": 0.21427351299010627, "compression_loss": 64.25151824951172, "distillation_loss": 0.8366067409515381, "epoch": 1.16, "learning_rate": 4.174012800386427e-05, "loss": 65.0679, "step": 1368, "task_loss": 0.6697847843170166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5989837646484375, "compression/movement_sparsity/importance_threshold": -0.0027414424530149972, "compression/movement_sparsity/linear_layer_sparsity": 0.22413169880659115, "compression/movement_sparsity/model_sparsity": 0.21643208544282988, "compression_loss": 64.54248046875, "distillation_loss": 0.4663401246070862, "epoch": 1.16, "learning_rate": 4.173409008573844e-05, "loss": 65.3785, "step": 1369, "task_loss": 0.5330495834350586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6016868115497541, "compression/movement_sparsity/importance_threshold": -0.0027332181311470693, "compression/movement_sparsity/linear_layer_sparsity": 0.22613675951872914, "compression/movement_sparsity/model_sparsity": 0.2183682661512177, "compression_loss": 64.83292388916016, "distillation_loss": 0.6931779384613037, "epoch": 1.16, "learning_rate": 4.172805216761261e-05, "loss": 65.659, "step": 1370, "task_loss": 0.9911800622940063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6043844469475719, "compression/movement_sparsity/importance_threshold": -0.0027250102743824815, "compression/movement_sparsity/linear_layer_sparsity": 0.22821125465901076, "compression/movement_sparsity/model_sparsity": 0.22037149600153713, "compression_loss": 65.12277221679688, "distillation_loss": 1.086794376373291, "epoch": 1.16, "learning_rate": 4.172201424948678e-05, "loss": 65.8935, "step": 1371, "task_loss": 1.3339197635650635 }, { "compression/movement_sparsity/importance_regularization_factor": 0.607076676264239, "compression/movement_sparsity/importance_threshold": -0.0027168188662231346, "compression/movement_sparsity/linear_layer_sparsity": 0.2302616152839974, "compression/movement_sparsity/model_sparsity": 0.22235142043140843, "compression_loss": 65.41204071044922, "distillation_loss": 0.8879855275154114, "epoch": 1.16, "learning_rate": 4.171597633136095e-05, "loss": 66.1692, "step": 1372, "task_loss": 0.9749023914337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6097635049221041, "compression/movement_sparsity/importance_threshold": -0.0027086438901709276, "compression/movement_sparsity/linear_layer_sparsity": 0.23222244925837401, "compression/movement_sparsity/model_sparsity": 0.2242448937265343, "compression_loss": 65.70072937011719, "distillation_loss": 0.7153295278549194, "epoch": 1.16, "learning_rate": 4.1709938413235116e-05, "loss": 66.5496, "step": 1373, "task_loss": 0.6534342765808105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6124449383435151, "compression/movement_sparsity/importance_threshold": -0.0027004853297277623, "compression/movement_sparsity/linear_layer_sparsity": 0.23412299464118366, "compression/movement_sparsity/model_sparsity": 0.226080149528683, "compression_loss": 65.98876953125, "distillation_loss": 0.7891640663146973, "epoch": 1.16, "learning_rate": 4.170390049510929e-05, "loss": 66.6855, "step": 1374, "task_loss": 0.8584902882575989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6151209819508199, "compression/movement_sparsity/importance_threshold": -0.002692343168395539, "compression/movement_sparsity/linear_layer_sparsity": 0.2361486126183259, "compression/movement_sparsity/model_sparsity": 0.2280361812967806, "compression_loss": 66.27623748779297, "distillation_loss": 0.5732203125953674, "epoch": 1.16, "learning_rate": 4.169786257698346e-05, "loss": 66.9091, "step": 1375, "task_loss": 0.5100113153457642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6177916411663673, "compression/movement_sparsity/importance_threshold": -0.002684217389676157, "compression/movement_sparsity/linear_layer_sparsity": 0.2381610543902305, "compression/movement_sparsity/model_sparsity": 0.22997948950282526, "compression_loss": 66.56306457519531, "distillation_loss": 0.5590233206748962, "epoch": 1.16, "learning_rate": 4.1691824658857624e-05, "loss": 67.377, "step": 1376, "task_loss": 1.5021125078201294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6204569214125051, "compression/movement_sparsity/importance_threshold": -0.002676107977071518, "compression/movement_sparsity/linear_layer_sparsity": 0.24036286386836025, "compression/movement_sparsity/model_sparsity": 0.232105660051823, "compression_loss": 66.84934997558594, "distillation_loss": 0.6142886281013489, "epoch": 1.16, "learning_rate": 4.16857867407318e-05, "loss": 67.6273, "step": 1377, "task_loss": 0.8872668147087097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6231168281115813, "compression/movement_sparsity/importance_threshold": -0.0026680149140835222, "compression/movement_sparsity/linear_layer_sparsity": 0.24237684385788988, "compression/movement_sparsity/model_sparsity": 0.23405045363298516, "compression_loss": 67.13507080078125, "distillation_loss": 1.406994342803955, "epoch": 1.16, "learning_rate": 4.1679748822605965e-05, "loss": 68.2786, "step": 1378, "task_loss": 0.8127667903900146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6257713666859449, "compression/movement_sparsity/importance_threshold": -0.002659938184214069, "compression/movement_sparsity/linear_layer_sparsity": 0.2444149345143792, "compression/movement_sparsity/model_sparsity": 0.23601852960552386, "compression_loss": 67.42024230957031, "distillation_loss": 0.8874384760856628, "epoch": 1.17, "learning_rate": 4.167371090448014e-05, "loss": 68.2482, "step": 1379, "task_loss": 0.263242244720459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6284205425579432, "compression/movement_sparsity/importance_threshold": -0.0026518777709650613, "compression/movement_sparsity/linear_layer_sparsity": 0.24646090704567586, "compression/movement_sparsity/model_sparsity": 0.23799421668622275, "compression_loss": 67.7048110961914, "distillation_loss": 0.6260169744491577, "epoch": 1.17, "learning_rate": 4.1667672986354306e-05, "loss": 68.6636, "step": 1380, "task_loss": 0.3249625861644745 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6310643611499249, "compression/movement_sparsity/importance_threshold": -0.0026438336578383974, "compression/movement_sparsity/linear_layer_sparsity": 0.24854806565198678, "compression/movement_sparsity/model_sparsity": 0.240009674973556, "compression_loss": 67.98883819580078, "distillation_loss": 1.2066599130630493, "epoch": 1.17, "learning_rate": 4.166163506822848e-05, "loss": 69.0842, "step": 1381, "task_loss": 1.6980273723602295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.633702827884238, "compression/movement_sparsity/importance_threshold": -0.0026358058283359777, "compression/movement_sparsity/linear_layer_sparsity": 0.2504901548348398, "compression/movement_sparsity/model_sparsity": 0.24188504741841285, "compression_loss": 68.27230834960938, "distillation_loss": 1.112046480178833, "epoch": 1.17, "learning_rate": 4.165559715010265e-05, "loss": 69.295, "step": 1382, "task_loss": 0.630577564239502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.636335948183231, "compression/movement_sparsity/importance_threshold": -0.0026277942659597036, "compression/movement_sparsity/linear_layer_sparsity": 0.25255428787344586, "compression/movement_sparsity/model_sparsity": 0.24387827113712685, "compression_loss": 68.55519104003906, "distillation_loss": 1.0415713787078857, "epoch": 1.17, "learning_rate": 4.1649559231976815e-05, "loss": 69.6207, "step": 1383, "task_loss": 1.6775456666946411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6389637274692515, "compression/movement_sparsity/importance_threshold": -0.0026197989542114768, "compression/movement_sparsity/linear_layer_sparsity": 0.25465263134899924, "compression/movement_sparsity/model_sparsity": 0.24590453005903537, "compression_loss": 68.83745574951172, "distillation_loss": 1.1353254318237305, "epoch": 1.17, "learning_rate": 4.164352131385099e-05, "loss": 69.6061, "step": 1384, "task_loss": 0.8452408313751221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6415861711646482, "compression/movement_sparsity/importance_threshold": -0.0026118198765931943, "compression/movement_sparsity/linear_layer_sparsity": 0.2566804314488188, "compression/movement_sparsity/model_sparsity": 0.2478626689871834, "compression_loss": 69.11917114257812, "distillation_loss": 1.077697992324829, "epoch": 1.17, "learning_rate": 4.1637483395725156e-05, "loss": 70.2182, "step": 1385, "task_loss": 1.9962106943130493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6442032846917692, "compression/movement_sparsity/importance_threshold": -0.0026038570166067596, "compression/movement_sparsity/linear_layer_sparsity": 0.2585530027343547, "compression/movement_sparsity/model_sparsity": 0.2496709116883581, "compression_loss": 69.40036010742188, "distillation_loss": 0.6430842280387878, "epoch": 1.17, "learning_rate": 4.163144547759932e-05, "loss": 70.3305, "step": 1386, "task_loss": 0.7763282060623169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6468150734729627, "compression/movement_sparsity/importance_threshold": -0.0025959103577540715, "compression/movement_sparsity/linear_layer_sparsity": 0.2606312659116093, "compression/movement_sparsity/model_sparsity": 0.2516777801319886, "compression_loss": 69.68089294433594, "distillation_loss": 0.2612699866294861, "epoch": 1.17, "learning_rate": 4.16254075594735e-05, "loss": 70.4575, "step": 1387, "task_loss": 0.2202429622411728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6494215429305765, "compression/movement_sparsity/importance_threshold": -0.002587979883537032, "compression/movement_sparsity/linear_layer_sparsity": 0.26289143226614897, "compression/movement_sparsity/model_sparsity": 0.25386030281916483, "compression_loss": 69.9608154296875, "distillation_loss": 0.889277994632721, "epoch": 1.17, "learning_rate": 4.1619369641347664e-05, "loss": 70.9411, "step": 1388, "task_loss": 0.849382758140564 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6520226984869595, "compression/movement_sparsity/importance_threshold": -0.00258006557745754, "compression/movement_sparsity/linear_layer_sparsity": 0.264844098185695, "compression/movement_sparsity/model_sparsity": 0.25574588865727144, "compression_loss": 70.24024963378906, "distillation_loss": 0.6564585566520691, "epoch": 1.17, "learning_rate": 4.161333172322184e-05, "loss": 71.1757, "step": 1389, "task_loss": 0.9101492762565613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6546185455644595, "compression/movement_sparsity/importance_threshold": -0.0025721674230174962, "compression/movement_sparsity/linear_layer_sparsity": 0.2669554390039714, "compression/movement_sparsity/model_sparsity": 0.257784698423196, "compression_loss": 70.51902770996094, "distillation_loss": 0.8258087635040283, "epoch": 1.17, "learning_rate": 4.1607293805096005e-05, "loss": 71.4319, "step": 1390, "task_loss": 0.5144898891448975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6572090895854248, "compression/movement_sparsity/importance_threshold": -0.0025642854037188014, "compression/movement_sparsity/linear_layer_sparsity": 0.26895404874141843, "compression/movement_sparsity/model_sparsity": 0.259714649767719, "compression_loss": 70.79725646972656, "distillation_loss": 0.8017131090164185, "epoch": 1.18, "learning_rate": 4.160125588697017e-05, "loss": 71.6644, "step": 1391, "task_loss": 1.0256181955337524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6597943359722029, "compression/movement_sparsity/importance_threshold": -0.0025564195030633573, "compression/movement_sparsity/linear_layer_sparsity": 0.271268446210366, "compression/movement_sparsity/model_sparsity": 0.2619495405636888, "compression_loss": 71.0748291015625, "distillation_loss": 1.2390258312225342, "epoch": 1.18, "learning_rate": 4.1595217968844346e-05, "loss": 72.124, "step": 1392, "task_loss": 1.5512627363204956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6623742901471432, "compression/movement_sparsity/importance_threshold": -0.0025485697045530623, "compression/movement_sparsity/linear_layer_sparsity": 0.2734800931267953, "compression/movement_sparsity/model_sparsity": 0.26408521060471707, "compression_loss": 71.35183715820312, "distillation_loss": 0.7582095861434937, "epoch": 1.18, "learning_rate": 4.1589180050718514e-05, "loss": 72.4596, "step": 1393, "task_loss": 0.7546876668930054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6649489575325933, "compression/movement_sparsity/importance_threshold": -0.002540735991689817, "compression/movement_sparsity/linear_layer_sparsity": 0.27561110882167095, "compression/movement_sparsity/model_sparsity": 0.26614301935470264, "compression_loss": 71.62828826904297, "distillation_loss": 1.2509907484054565, "epoch": 1.18, "learning_rate": 4.158314213259268e-05, "loss": 72.6355, "step": 1394, "task_loss": 0.7522532939910889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.667518343550901, "compression/movement_sparsity/importance_threshold": -0.0025329183479755246, "compression/movement_sparsity/linear_layer_sparsity": 0.27791360597131787, "compression/movement_sparsity/model_sparsity": 0.26836641864394944, "compression_loss": 71.90416717529297, "distillation_loss": 0.6605136394500732, "epoch": 1.18, "learning_rate": 4.1577104214466855e-05, "loss": 72.7973, "step": 1395, "task_loss": 0.8413481116294861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6700824536244152, "compression/movement_sparsity/importance_threshold": -0.0025251167569120825, "compression/movement_sparsity/linear_layer_sparsity": 0.27994866788922773, "compression/movement_sparsity/model_sparsity": 0.2703315699243964, "compression_loss": 72.17947387695312, "distillation_loss": 2.018920421600342, "epoch": 1.18, "learning_rate": 4.157106629634102e-05, "loss": 73.5889, "step": 1396, "task_loss": 1.2018566131591797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6726412931754838, "compression/movement_sparsity/importance_threshold": -0.0025173312020013916, "compression/movement_sparsity/linear_layer_sparsity": 0.2822247172350584, "compression/movement_sparsity/model_sparsity": 0.2725294299732509, "compression_loss": 72.45419311523438, "distillation_loss": 1.0406205654144287, "epoch": 1.18, "learning_rate": 4.1565028378215196e-05, "loss": 73.4745, "step": 1397, "task_loss": 1.533910870552063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6751948676264545, "compression/movement_sparsity/importance_threshold": -0.0025095616667453547, "compression/movement_sparsity/linear_layer_sparsity": 0.2842650496350632, "compression/movement_sparsity/model_sparsity": 0.274499670678519, "compression_loss": 72.72845458984375, "distillation_loss": 0.6194800138473511, "epoch": 1.18, "learning_rate": 4.155899046008936e-05, "loss": 73.5565, "step": 1398, "task_loss": 0.40264472365379333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6777431823996767, "compression/movement_sparsity/importance_threshold": -0.0025018081346458686, "compression/movement_sparsity/linear_layer_sparsity": 0.28627910116959865, "compression/movement_sparsity/model_sparsity": 0.27644453334689595, "compression_loss": 73.00200653076172, "distillation_loss": 2.0512421131134033, "epoch": 1.18, "learning_rate": 4.155295254196354e-05, "loss": 74.4878, "step": 1399, "task_loss": 1.1340610980987549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6802862429174975, "compression/movement_sparsity/importance_threshold": -0.0024940705892048365, "compression/movement_sparsity/linear_layer_sparsity": 0.2884824369411858, "compression/movement_sparsity/model_sparsity": 0.27857217775647536, "compression_loss": 73.27507781982422, "distillation_loss": 1.5700030326843262, "epoch": 1.18, "learning_rate": 4.1546914623837704e-05, "loss": 74.572, "step": 1400, "task_loss": 1.5543832778930664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6828240546022653, "compression/movement_sparsity/importance_threshold": -0.002486349013924159, "compression/movement_sparsity/linear_layer_sparsity": 0.29054994451923283, "compression/movement_sparsity/model_sparsity": 0.2805686600888192, "compression_loss": 73.5475845336914, "distillation_loss": 0.9511449337005615, "epoch": 1.18, "learning_rate": 4.154087670571187e-05, "loss": 74.491, "step": 1401, "task_loss": 0.8736863136291504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6853566228763288, "compression/movement_sparsity/importance_threshold": -0.0024786433923057343, "compression/movement_sparsity/linear_layer_sparsity": 0.29287848405099653, "compression/movement_sparsity/model_sparsity": 0.28281720712424135, "compression_loss": 73.81948852539062, "distillation_loss": 1.6658793687820435, "epoch": 1.19, "learning_rate": 4.1534838787586045e-05, "loss": 74.9614, "step": 1402, "task_loss": 1.5326954126358032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.687883953162036, "compression/movement_sparsity/importance_threshold": -0.0024709537078514638, "compression/movement_sparsity/linear_layer_sparsity": 0.2951152313402993, "compression/movement_sparsity/model_sparsity": 0.2849771152631171, "compression_loss": 74.09085083007812, "distillation_loss": 0.8680563569068909, "epoch": 1.19, "learning_rate": 4.152880086946021e-05, "loss": 74.8887, "step": 1403, "task_loss": 1.6420655250549316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6904060508817345, "compression/movement_sparsity/importance_threshold": -0.00246327994406325, "compression/movement_sparsity/linear_layer_sparsity": 0.29723178301983266, "compression/movement_sparsity/model_sparsity": 0.2870209568811839, "compression_loss": 74.36160278320312, "distillation_loss": 1.452422857284546, "epoch": 1.19, "learning_rate": 4.152276295133438e-05, "loss": 75.4644, "step": 1404, "task_loss": 1.1320195198059082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6929229214577731, "compression/movement_sparsity/importance_threshold": -0.0024556220844429905, "compression/movement_sparsity/linear_layer_sparsity": 0.2993860389174306, "compression/movement_sparsity/model_sparsity": 0.28910120746143303, "compression_loss": 74.63177490234375, "distillation_loss": 0.9171569347381592, "epoch": 1.19, "learning_rate": 4.1516725033208554e-05, "loss": 75.7188, "step": 1405, "task_loss": 0.7533453106880188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6954345703125, "compression/movement_sparsity/importance_threshold": -0.002447980112492587, "compression/movement_sparsity/linear_layer_sparsity": 0.3015433831744462, "compression/movement_sparsity/model_sparsity": 0.2911844403064529, "compression_loss": 74.90142059326172, "distillation_loss": 0.7895047664642334, "epoch": 1.19, "learning_rate": 4.151068711508272e-05, "loss": 75.871, "step": 1406, "task_loss": 1.1812800168991089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6979410028682633, "compression/movement_sparsity/importance_threshold": -0.0024403540117139398, "compression/movement_sparsity/linear_layer_sparsity": 0.3038659844707272, "compression/movement_sparsity/model_sparsity": 0.29342725310304935, "compression_loss": 75.1703872680664, "distillation_loss": 0.8484163284301758, "epoch": 1.19, "learning_rate": 4.150464919695689e-05, "loss": 76.3005, "step": 1407, "task_loss": 1.1036725044250488 }, { "compression/movement_sparsity/importance_regularization_factor": 0.700442224547411, "compression/movement_sparsity/importance_threshold": -0.0024327437656089497, "compression/movement_sparsity/linear_layer_sparsity": 0.30607543734031156, "compression/movement_sparsity/model_sparsity": 0.2955608044694914, "compression_loss": 75.4388656616211, "distillation_loss": 1.0059813261032104, "epoch": 1.19, "learning_rate": 4.149861127883106e-05, "loss": 76.683, "step": 1408, "task_loss": 1.337117314338684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7029382407722911, "compression/movement_sparsity/importance_threshold": -0.002425149357679518, "compression/movement_sparsity/linear_layer_sparsity": 0.30832575433238396, "compression/movement_sparsity/model_sparsity": 0.29773381615010136, "compression_loss": 75.70683288574219, "distillation_loss": 0.8014774918556213, "epoch": 1.19, "learning_rate": 4.1492573360705236e-05, "loss": 76.7041, "step": 1409, "task_loss": 0.8850807547569275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7054290569652526, "compression/movement_sparsity/importance_threshold": -0.002417570771427543, "compression/movement_sparsity/linear_layer_sparsity": 0.3104834324660934, "compression/movement_sparsity/model_sparsity": 0.2998173714021235, "compression_loss": 75.97415161132812, "distillation_loss": 0.9386861324310303, "epoch": 1.19, "learning_rate": 4.1486535442579396e-05, "loss": 77.2367, "step": 1410, "task_loss": 1.493424654006958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7079146785486432, "compression/movement_sparsity/importance_threshold": -0.002410007990354926, "compression/movement_sparsity/linear_layer_sparsity": 0.31251996105662244, "compression/movement_sparsity/model_sparsity": 0.3017839389704732, "compression_loss": 76.24102783203125, "distillation_loss": 1.3480424880981445, "epoch": 1.19, "learning_rate": 4.148049752445357e-05, "loss": 77.3146, "step": 1411, "task_loss": 1.812322974205017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7103951109448107, "compression/movement_sparsity/importance_threshold": -0.0024024609979635693, "compression/movement_sparsity/linear_layer_sparsity": 0.31477026612452724, "compression/movement_sparsity/model_sparsity": 0.3039569391365473, "compression_loss": 76.50728607177734, "distillation_loss": 1.0416315793991089, "epoch": 1.19, "learning_rate": 4.1474459606327744e-05, "loss": 77.9003, "step": 1412, "task_loss": 1.787137746810913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7128703595761041, "compression/movement_sparsity/importance_threshold": -0.00239492977775537, "compression/movement_sparsity/linear_layer_sparsity": 0.3169212786485282, "compression/movement_sparsity/model_sparsity": 0.3060340577630603, "compression_loss": 76.77301025390625, "distillation_loss": 0.9747606515884399, "epoch": 1.19, "learning_rate": 4.146842168820191e-05, "loss": 77.9852, "step": 1413, "task_loss": 1.4809799194335938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7153404298648711, "compression/movement_sparsity/importance_threshold": -0.002387414313232231, "compression/movement_sparsity/linear_layer_sparsity": 0.3189390147508631, "compression/movement_sparsity/model_sparsity": 0.30798247842299775, "compression_loss": 77.03817749023438, "distillation_loss": 0.716783344745636, "epoch": 1.2, "learning_rate": 4.146238377007608e-05, "loss": 77.8551, "step": 1414, "task_loss": 0.7942771911621094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7178053272334597, "compression/movement_sparsity/importance_threshold": -0.0023799145878960534, "compression/movement_sparsity/linear_layer_sparsity": 0.3210096941575013, "compression/movement_sparsity/model_sparsity": 0.30998202362186295, "compression_loss": 77.30272674560547, "distillation_loss": 0.9607566595077515, "epoch": 1.2, "learning_rate": 4.145634585195025e-05, "loss": 78.2777, "step": 1415, "task_loss": 0.6636053323745728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7202650571042188, "compression/movement_sparsity/importance_threshold": -0.0023724305852487345, "compression/movement_sparsity/linear_layer_sparsity": 0.3232173941744431, "compression/movement_sparsity/model_sparsity": 0.3121138823515432, "compression_loss": 77.56659698486328, "distillation_loss": 1.8384191989898682, "epoch": 1.2, "learning_rate": 4.145030793382442e-05, "loss": 79.1871, "step": 1416, "task_loss": 1.33530592918396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7227196248994963, "compression/movement_sparsity/importance_threshold": -0.0023649622887921768, "compression/movement_sparsity/linear_layer_sparsity": 0.3252054271751971, "compression/movement_sparsity/model_sparsity": 0.3140336203028164, "compression_loss": 77.83009338378906, "distillation_loss": 1.251970887184143, "epoch": 1.2, "learning_rate": 4.144427001569859e-05, "loss": 79.0636, "step": 1417, "task_loss": 1.932105302810669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7251690360416397, "compression/movement_sparsity/importance_threshold": -0.002357509682028282, "compression/movement_sparsity/linear_layer_sparsity": 0.32744786229246226, "compression/movement_sparsity/model_sparsity": 0.31619902087526613, "compression_loss": 78.09297180175781, "distillation_loss": 1.906305193901062, "epoch": 1.2, "learning_rate": 4.143823209757276e-05, "loss": 79.7876, "step": 1418, "task_loss": 1.1763521432876587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7276132959529984, "compression/movement_sparsity/importance_threshold": -0.002350072748458948, "compression/movement_sparsity/linear_layer_sparsity": 0.3294561544540296, "compression/movement_sparsity/model_sparsity": 0.3181383220228543, "compression_loss": 78.35538482666016, "distillation_loss": 0.9107580184936523, "epoch": 1.2, "learning_rate": 4.143219417944693e-05, "loss": 79.6057, "step": 1419, "task_loss": 1.141257643699646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7300524100559197, "compression/movement_sparsity/importance_threshold": -0.002342651471586077, "compression/movement_sparsity/linear_layer_sparsity": 0.33159490893370086, "compression/movement_sparsity/model_sparsity": 0.3202036037065705, "compression_loss": 78.61719512939453, "distillation_loss": 1.711940884590149, "epoch": 1.2, "learning_rate": 4.1426156261321095e-05, "loss": 79.7711, "step": 1420, "task_loss": 1.276822566986084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7324863837727519, "compression/movement_sparsity/importance_threshold": -0.002335245834911569, "compression/movement_sparsity/linear_layer_sparsity": 0.33356784593822786, "compression/movement_sparsity/model_sparsity": 0.32210876425552787, "compression_loss": 78.87846374511719, "distillation_loss": 0.9947202205657959, "epoch": 1.2, "learning_rate": 4.142011834319527e-05, "loss": 79.9571, "step": 1421, "task_loss": 0.8035528659820557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7349152225258437, "compression/movement_sparsity/importance_threshold": -0.0023278558219373235, "compression/movement_sparsity/linear_layer_sparsity": 0.3355727993328571, "compression/movement_sparsity/model_sparsity": 0.3240448413330935, "compression_loss": 79.13907623291016, "distillation_loss": 1.3028619289398193, "epoch": 1.2, "learning_rate": 4.141408042506944e-05, "loss": 80.3732, "step": 1422, "task_loss": 1.2800740003585815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7373389317375428, "compression/movement_sparsity/importance_threshold": -0.002320481416165242, "compression/movement_sparsity/linear_layer_sparsity": 0.3376931905943692, "compression/movement_sparsity/model_sparsity": 0.32609239063168616, "compression_loss": 79.39927673339844, "distillation_loss": 1.0575711727142334, "epoch": 1.2, "learning_rate": 4.1408042506943604e-05, "loss": 80.5298, "step": 1423, "task_loss": 0.9868329763412476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7397575168301977, "compression/movement_sparsity/importance_threshold": -0.002313122601097225, "compression/movement_sparsity/linear_layer_sparsity": 0.33998081830697424, "compression/movement_sparsity/model_sparsity": 0.3283014312947972, "compression_loss": 79.65879821777344, "distillation_loss": 2.0688130855560303, "epoch": 1.2, "learning_rate": 4.140200458881778e-05, "loss": 81.0386, "step": 1424, "task_loss": 1.3961021900177002 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7421709832261562, "compression/movement_sparsity/importance_threshold": -0.0023057793602351733, "compression/movement_sparsity/linear_layer_sparsity": 0.34222296724421614, "compression/movement_sparsity/model_sparsity": 0.33046655551838794, "compression_loss": 79.9178695678711, "distillation_loss": 2.3836803436279297, "epoch": 1.2, "learning_rate": 4.139596667069195e-05, "loss": 81.5482, "step": 1425, "task_loss": 1.466278314590454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.744579336347767, "compression/movement_sparsity/importance_threshold": -0.0022984516770809854, "compression/movement_sparsity/linear_layer_sparsity": 0.34427266011581514, "compression/movement_sparsity/model_sparsity": 0.3324458351342547, "compression_loss": 80.17637634277344, "distillation_loss": 0.723200798034668, "epoch": 1.21, "learning_rate": 4.138992875256611e-05, "loss": 81.2356, "step": 1426, "task_loss": 0.7641627788543701 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7469825816173778, "compression/movement_sparsity/importance_threshold": -0.002291139535136564, "compression/movement_sparsity/linear_layer_sparsity": 0.346427345283448, "compression/movement_sparsity/model_sparsity": 0.33452650023779246, "compression_loss": 80.43431854248047, "distillation_loss": 1.9269773960113525, "epoch": 1.21, "learning_rate": 4.1383890834440286e-05, "loss": 81.867, "step": 1427, "task_loss": 1.6321303844451904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7493807244573374, "compression/movement_sparsity/importance_threshold": -0.0022838429179038084, "compression/movement_sparsity/linear_layer_sparsity": 0.34862209565433727, "compression/movement_sparsity/model_sparsity": 0.3366458541815998, "compression_loss": 80.69171142578125, "distillation_loss": 1.542504072189331, "epoch": 1.21, "learning_rate": 4.137785291631446e-05, "loss": 81.9669, "step": 1428, "task_loss": 0.8136503100395203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7517737702899931, "compression/movement_sparsity/importance_threshold": -0.0022765618088846206, "compression/movement_sparsity/linear_layer_sparsity": 0.3507870952269751, "compression/movement_sparsity/model_sparsity": 0.33873647935859985, "compression_loss": 80.94857788085938, "distillation_loss": 1.6139310598373413, "epoch": 1.21, "learning_rate": 4.137181499818863e-05, "loss": 82.1961, "step": 1429, "task_loss": 1.1871012449264526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7541617245376939, "compression/movement_sparsity/importance_threshold": -0.002269296191580899, "compression/movement_sparsity/linear_layer_sparsity": 0.35298265644126364, "compression/movement_sparsity/model_sparsity": 0.34085661629084124, "compression_loss": 81.20484924316406, "distillation_loss": 2.0173540115356445, "epoch": 1.21, "learning_rate": 4.1365777080062794e-05, "loss": 82.7368, "step": 1430, "task_loss": 0.774440586566925 }, { "compression/movement_sparsity/importance_regularization_factor": 0.756544592622788, "compression/movement_sparsity/importance_threshold": -0.002262046049494544, "compression/movement_sparsity/linear_layer_sparsity": 0.3549282513294016, "compression/movement_sparsity/model_sparsity": 0.3427353740092217, "compression_loss": 81.46064758300781, "distillation_loss": 1.1124191284179688, "epoch": 1.21, "learning_rate": 4.135973916193697e-05, "loss": 82.6803, "step": 1431, "task_loss": 1.066011905670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7589223799676228, "compression/movement_sparsity/importance_threshold": -0.0022548113661274584, "compression/movement_sparsity/linear_layer_sparsity": 0.3571622918326511, "compression/movement_sparsity/model_sparsity": 0.3448926683484721, "compression_loss": 81.71576690673828, "distillation_loss": 1.8398789167404175, "epoch": 1.21, "learning_rate": 4.1353701243811135e-05, "loss": 83.3577, "step": 1432, "task_loss": 1.8267942667007446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7612950919945474, "compression/movement_sparsity/importance_threshold": -0.0022475921249815404, "compression/movement_sparsity/linear_layer_sparsity": 0.35917944365077187, "compression/movement_sparsity/model_sparsity": 0.34684052479615557, "compression_loss": 81.9704360961914, "distillation_loss": 1.6999833583831787, "epoch": 1.21, "learning_rate": 4.13476633256853e-05, "loss": 83.3603, "step": 1433, "task_loss": 1.4439703226089478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7636627341259096, "compression/movement_sparsity/importance_threshold": -0.002240388309558691, "compression/movement_sparsity/linear_layer_sparsity": 0.3610948823189587, "compression/movement_sparsity/model_sparsity": 0.3486901622535116, "compression_loss": 82.22450256347656, "distillation_loss": 1.1816179752349854, "epoch": 1.21, "learning_rate": 4.1341625407559477e-05, "loss": 83.564, "step": 1434, "task_loss": 0.3605434000492096 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7660253117840572, "compression/movement_sparsity/importance_threshold": -0.0022331999033608123, "compression/movement_sparsity/linear_layer_sparsity": 0.36294585893690606, "compression/movement_sparsity/model_sparsity": 0.3504775521303624, "compression_loss": 82.47810363769531, "distillation_loss": 1.076633334159851, "epoch": 1.21, "learning_rate": 4.1335587489433644e-05, "loss": 83.4269, "step": 1435, "task_loss": 0.5194324254989624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7683828303913391, "compression/movement_sparsity/importance_threshold": -0.002226026889889802, "compression/movement_sparsity/linear_layer_sparsity": 0.3652759486104624, "compression/movement_sparsity/model_sparsity": 0.35272759605543785, "compression_loss": 82.73101806640625, "distillation_loss": 1.6702191829681396, "epoch": 1.21, "learning_rate": 4.132954957130781e-05, "loss": 83.8605, "step": 1436, "task_loss": 0.9430710673332214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7707352953701033, "compression/movement_sparsity/importance_threshold": -0.002218869252647562, "compression/movement_sparsity/linear_layer_sparsity": 0.36739728188121773, "compression/movement_sparsity/model_sparsity": 0.35477605500235826, "compression_loss": 82.9834213256836, "distillation_loss": 1.8774333000183105, "epoch": 1.21, "learning_rate": 4.1323511653181985e-05, "loss": 84.3656, "step": 1437, "task_loss": 1.7696537971496582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7730827121426974, "compression/movement_sparsity/importance_threshold": -0.0022117269751359943, "compression/movement_sparsity/linear_layer_sparsity": 0.3694472728570077, "compression/movement_sparsity/model_sparsity": 0.3567556224816199, "compression_loss": 83.23524475097656, "distillation_loss": 1.9800899028778076, "epoch": 1.22, "learning_rate": 4.131747373505616e-05, "loss": 84.7307, "step": 1438, "task_loss": 1.8810187578201294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7754250861314707, "compression/movement_sparsity/importance_threshold": -0.002204600040856996, "compression/movement_sparsity/linear_layer_sparsity": 0.3714385134588557, "compression/movement_sparsity/model_sparsity": 0.3586784578430218, "compression_loss": 83.48660278320312, "distillation_loss": 1.5159274339675903, "epoch": 1.22, "learning_rate": 4.131143581693032e-05, "loss": 85.0444, "step": 1439, "task_loss": 1.4477652311325073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7777624227587706, "compression/movement_sparsity/importance_threshold": -0.0021974884333124697, "compression/movement_sparsity/linear_layer_sparsity": 0.3733516984593594, "compression/movement_sparsity/model_sparsity": 0.36052591905311276, "compression_loss": 83.73737335205078, "distillation_loss": 1.3582063913345337, "epoch": 1.22, "learning_rate": 4.130539789880449e-05, "loss": 84.8698, "step": 1440, "task_loss": 0.4855771064758301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.780094727446945, "compression/movement_sparsity/importance_threshold": -0.002190392136004317, "compression/movement_sparsity/linear_layer_sparsity": 0.3754805081832224, "compression/movement_sparsity/model_sparsity": 0.3625815976139763, "compression_loss": 83.98758697509766, "distillation_loss": 1.9738010168075562, "epoch": 1.22, "learning_rate": 4.129935998067867e-05, "loss": 85.7191, "step": 1441, "task_loss": 1.0342822074890137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7824220056183431, "compression/movement_sparsity/importance_threshold": -0.002183311132434435, "compression/movement_sparsity/linear_layer_sparsity": 0.37757826737456157, "compression/movement_sparsity/model_sparsity": 0.3646072923236309, "compression_loss": 84.2373275756836, "distillation_loss": 1.7988866567611694, "epoch": 1.22, "learning_rate": 4.1293322062552834e-05, "loss": 85.7142, "step": 1442, "task_loss": 1.8961724042892456 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7847442626953125, "compression/movement_sparsity/importance_threshold": -0.0021762454061047265, "compression/movement_sparsity/linear_layer_sparsity": 0.379596575836943, "compression/movement_sparsity/model_sparsity": 0.36655626568128646, "compression_loss": 84.4864273071289, "distillation_loss": 0.8074774742126465, "epoch": 1.22, "learning_rate": 4.1287284144427e-05, "loss": 85.8028, "step": 1443, "task_loss": 0.8500573039054871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7870615041002014, "compression/movement_sparsity/importance_threshold": -0.002169194940517092, "compression/movement_sparsity/linear_layer_sparsity": 0.3818171897274345, "compression/movement_sparsity/model_sparsity": 0.36870059465323224, "compression_loss": 84.73507690429688, "distillation_loss": 1.3441245555877686, "epoch": 1.22, "learning_rate": 4.1281246226301175e-05, "loss": 86.0975, "step": 1444, "task_loss": 0.8071223497390747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7893737352553581, "compression/movement_sparsity/importance_threshold": -0.0021621597191734303, "compression/movement_sparsity/linear_layer_sparsity": 0.3839934457143206, "compression/movement_sparsity/model_sparsity": 0.37080208955202226, "compression_loss": 84.98311614990234, "distillation_loss": 1.5083156824111938, "epoch": 1.22, "learning_rate": 4.127520830817534e-05, "loss": 86.5824, "step": 1445, "task_loss": 0.926063597202301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7916809615831306, "compression/movement_sparsity/importance_threshold": -0.002155139725575644, "compression/movement_sparsity/linear_layer_sparsity": 0.3859728933143768, "compression/movement_sparsity/model_sparsity": 0.3727135370375234, "compression_loss": 85.23066711425781, "distillation_loss": 1.4518623352050781, "epoch": 1.22, "learning_rate": 4.126917039004951e-05, "loss": 86.7656, "step": 1446, "task_loss": 1.374513864517212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7939831885058674, "compression/movement_sparsity/importance_threshold": -0.0021481349432256325, "compression/movement_sparsity/linear_layer_sparsity": 0.38789087183027005, "compression/movement_sparsity/model_sparsity": 0.3745656270910037, "compression_loss": 85.47763061523438, "distillation_loss": 1.1333496570587158, "epoch": 1.22, "learning_rate": 4.1263132471923684e-05, "loss": 86.8897, "step": 1447, "task_loss": 1.2312239408493042 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7962804214459166, "compression/movement_sparsity/importance_threshold": -0.002141145355625296, "compression/movement_sparsity/linear_layer_sparsity": 0.3901611975756355, "compression/movement_sparsity/model_sparsity": 0.3767579601626769, "compression_loss": 85.72413635253906, "distillation_loss": 1.4136972427368164, "epoch": 1.22, "learning_rate": 4.125709455379785e-05, "loss": 86.9395, "step": 1448, "task_loss": 0.8712185621261597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7985726658256258, "compression/movement_sparsity/importance_threshold": -0.002134170946276536, "compression/movement_sparsity/linear_layer_sparsity": 0.39218958195966924, "compression/movement_sparsity/model_sparsity": 0.37871666330307885, "compression_loss": 85.97003936767578, "distillation_loss": 1.6083614826202393, "epoch": 1.22, "learning_rate": 4.125105663567202e-05, "loss": 87.662, "step": 1449, "task_loss": 0.5006231665611267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8008599270673442, "compression/movement_sparsity/importance_threshold": -0.002127211698681252, "compression/movement_sparsity/linear_layer_sparsity": 0.39427657362763324, "compression/movement_sparsity/model_sparsity": 0.380731960386911, "compression_loss": 86.21541595458984, "distillation_loss": 1.863974690437317, "epoch": 1.23, "learning_rate": 4.124501871754619e-05, "loss": 87.7385, "step": 1450, "task_loss": 1.1951030492782593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8031422105934196, "compression/movement_sparsity/importance_threshold": -0.0021202675963413443, "compression/movement_sparsity/linear_layer_sparsity": 0.3962438943492037, "compression/movement_sparsity/model_sparsity": 0.3826316975895091, "compression_loss": 86.46029663085938, "distillation_loss": 1.480433464050293, "epoch": 1.23, "learning_rate": 4.123898079942036e-05, "loss": 87.8981, "step": 1451, "task_loss": 1.4680736064910889 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8054195218261998, "compression/movement_sparsity/importance_threshold": -0.002113338622758714, "compression/movement_sparsity/linear_layer_sparsity": 0.3982860749951922, "compression/movement_sparsity/model_sparsity": 0.3846037230478253, "compression_loss": 86.70458984375, "distillation_loss": 1.581674575805664, "epoch": 1.23, "learning_rate": 4.123294288129453e-05, "loss": 88.3844, "step": 1452, "task_loss": 2.433668851852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8076918661880335, "compression/movement_sparsity/importance_threshold": -0.002106424761435261, "compression/movement_sparsity/linear_layer_sparsity": 0.4002846728084715, "compression/movement_sparsity/model_sparsity": 0.3865336628778125, "compression_loss": 86.94840240478516, "distillation_loss": 1.4813472032546997, "epoch": 1.23, "learning_rate": 4.12269049631687e-05, "loss": 88.6527, "step": 1453, "task_loss": 0.9392086267471313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8099592491012688, "compression/movement_sparsity/importance_threshold": -0.0020995259958728864, "compression/movement_sparsity/linear_layer_sparsity": 0.40240111717049615, "compression/movement_sparsity/model_sparsity": 0.38857740086505715, "compression_loss": 87.19169616699219, "distillation_loss": 1.459622859954834, "epoch": 1.23, "learning_rate": 4.1220867045042874e-05, "loss": 88.6092, "step": 1454, "task_loss": 1.4059369564056396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8122216759882535, "compression/movement_sparsity/importance_threshold": -0.002092642309573491, "compression/movement_sparsity/linear_layer_sparsity": 0.40416343760207085, "compression/movement_sparsity/model_sparsity": 0.3902791801682755, "compression_loss": 87.43447875976562, "distillation_loss": 2.5178380012512207, "epoch": 1.23, "learning_rate": 4.121482912691704e-05, "loss": 89.6026, "step": 1455, "task_loss": 1.5118138790130615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8144791522713364, "compression/movement_sparsity/importance_threshold": -0.002085773686038974, "compression/movement_sparsity/linear_layer_sparsity": 0.4061536765738375, "compression/movement_sparsity/model_sparsity": 0.3922010483086707, "compression_loss": 87.67665100097656, "distillation_loss": 1.2558326721191406, "epoch": 1.23, "learning_rate": 4.120879120879121e-05, "loss": 89.4982, "step": 1456, "task_loss": 0.8126417398452759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8167316833728653, "compression/movement_sparsity/importance_threshold": -0.002078920108771236, "compression/movement_sparsity/linear_layer_sparsity": 0.4083373136204902, "compression/movement_sparsity/model_sparsity": 0.3943096707051176, "compression_loss": 87.91842651367188, "distillation_loss": 1.2887520790100098, "epoch": 1.23, "learning_rate": 4.120275329066538e-05, "loss": 89.504, "step": 1457, "task_loss": 0.39491498470306396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8189792747151883, "compression/movement_sparsity/importance_threshold": -0.0020720815612721793, "compression/movement_sparsity/linear_layer_sparsity": 0.4103506616291351, "compression/movement_sparsity/model_sparsity": 0.39625385401588265, "compression_loss": 88.15950012207031, "distillation_loss": 2.394442081451416, "epoch": 1.23, "learning_rate": 4.119671537253955e-05, "loss": 89.8026, "step": 1458, "task_loss": 1.57588529586792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.821221931720654, "compression/movement_sparsity/importance_threshold": -0.0020652580270437014, "compression/movement_sparsity/linear_layer_sparsity": 0.4122462584823759, "compression/movement_sparsity/model_sparsity": 0.39808433128567655, "compression_loss": 88.40016174316406, "distillation_loss": 2.413412570953369, "epoch": 1.23, "learning_rate": 4.119067745441372e-05, "loss": 90.5116, "step": 1459, "task_loss": 1.7402560710906982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8234596598116104, "compression/movement_sparsity/importance_threshold": -0.0020584494895877054, "compression/movement_sparsity/linear_layer_sparsity": 0.41412053492388373, "compression/movement_sparsity/model_sparsity": 0.39989422056546986, "compression_loss": 88.6402816772461, "distillation_loss": 1.2922289371490479, "epoch": 1.23, "learning_rate": 4.118463953628789e-05, "loss": 90.4996, "step": 1460, "task_loss": 1.291831135749817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8256924644104057, "compression/movement_sparsity/importance_threshold": -0.00205165593240609, "compression/movement_sparsity/linear_layer_sparsity": 0.41614310031411117, "compression/movement_sparsity/model_sparsity": 0.4018473046124041, "compression_loss": 88.87980651855469, "distillation_loss": 1.0690573453903198, "epoch": 1.23, "learning_rate": 4.117860161816206e-05, "loss": 90.2168, "step": 1461, "task_loss": 1.7019424438476562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8279203509393878, "compression/movement_sparsity/importance_threshold": -0.0020448773390007572, "compression/movement_sparsity/linear_layer_sparsity": 0.4183557607847896, "compression/movement_sparsity/model_sparsity": 0.40398395338897486, "compression_loss": 89.1187973022461, "distillation_loss": 3.436924457550049, "epoch": 1.24, "learning_rate": 4.117256370003623e-05, "loss": 91.0768, "step": 1462, "task_loss": 2.285360097885132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8301433248209054, "compression/movement_sparsity/importance_threshold": -0.0020381136928736064, "compression/movement_sparsity/linear_layer_sparsity": 0.4202366789876706, "compression/movement_sparsity/model_sparsity": 0.4058002562652057, "compression_loss": 89.35726928710938, "distillation_loss": 2.199679136276245, "epoch": 1.24, "learning_rate": 4.11665257819104e-05, "loss": 91.1663, "step": 1463, "task_loss": 1.2185769081115723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8323613914773064, "compression/movement_sparsity/importance_threshold": -0.002031364977526538, "compression/movement_sparsity/linear_layer_sparsity": 0.4223727505296238, "compression/movement_sparsity/model_sparsity": 0.4078629471783682, "compression_loss": 89.59520721435547, "distillation_loss": 1.9171276092529297, "epoch": 1.24, "learning_rate": 4.1160487863784567e-05, "loss": 91.434, "step": 1464, "task_loss": 1.48882257938385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8345745563309392, "compression/movement_sparsity/importance_threshold": -0.0020246311764614525, "compression/movement_sparsity/linear_layer_sparsity": 0.4245852798344582, "compression/movement_sparsity/model_sparsity": 0.4099994692950452, "compression_loss": 89.83258819580078, "distillation_loss": 1.6365708112716675, "epoch": 1.24, "learning_rate": 4.115444994565874e-05, "loss": 92.1061, "step": 1465, "task_loss": 1.5315965414047241 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8367828248041516, "compression/movement_sparsity/importance_threshold": -0.0020179122731802516, "compression/movement_sparsity/linear_layer_sparsity": 0.42640592136993993, "compression/movement_sparsity/model_sparsity": 0.4117575661928347, "compression_loss": 90.0694351196289, "distillation_loss": 2.515531301498413, "epoch": 1.24, "learning_rate": 4.114841202753291e-05, "loss": 91.7685, "step": 1466, "task_loss": 2.0843374729156494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8389862023192922, "compression/movement_sparsity/importance_threshold": -0.002011208251184834, "compression/movement_sparsity/linear_layer_sparsity": 0.42824732288127576, "compression/movement_sparsity/model_sparsity": 0.4135357098974424, "compression_loss": 90.30577850341797, "distillation_loss": 1.041003704071045, "epoch": 1.24, "learning_rate": 4.1142374109407075e-05, "loss": 91.8211, "step": 1467, "task_loss": 0.6549727320671082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8411846942987089, "compression/movement_sparsity/importance_threshold": -0.0020045190939771015, "compression/movement_sparsity/linear_layer_sparsity": 0.4302388138906441, "compression/movement_sparsity/model_sparsity": 0.4154587870640961, "compression_loss": 90.5416259765625, "distillation_loss": 0.758406400680542, "epoch": 1.24, "learning_rate": 4.113633619128125e-05, "loss": 92.1033, "step": 1468, "task_loss": 0.7709652781486511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.84337830616475, "compression/movement_sparsity/importance_threshold": -0.001997844785058954, "compression/movement_sparsity/linear_layer_sparsity": 0.43213488771059033, "compression/movement_sparsity/model_sparsity": 0.4172897249153218, "compression_loss": 90.7768783569336, "distillation_loss": 2.275209903717041, "epoch": 1.24, "learning_rate": 4.1130298273155416e-05, "loss": 92.3913, "step": 1469, "task_loss": 1.299243688583374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.845567043339764, "compression/movement_sparsity/importance_threshold": -0.001991185307932291, "compression/movement_sparsity/linear_layer_sparsity": 0.4342314067884954, "compression/movement_sparsity/model_sparsity": 0.41931422211325375, "compression_loss": 91.01165008544922, "distillation_loss": 1.088546872138977, "epoch": 1.24, "learning_rate": 4.112426035502959e-05, "loss": 92.3818, "step": 1470, "task_loss": 0.5441562533378601 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8477509112460988, "compression/movement_sparsity/importance_threshold": -0.0019845406460990145, "compression/movement_sparsity/linear_layer_sparsity": 0.43640322698502093, "compression/movement_sparsity/model_sparsity": 0.42141143360472827, "compression_loss": 91.24581146240234, "distillation_loss": 2.9870645999908447, "epoch": 1.24, "learning_rate": 4.111822243690376e-05, "loss": 93.25, "step": 1471, "task_loss": 2.1369879245758057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8499299153061023, "compression/movement_sparsity/importance_threshold": -0.001977910783061025, "compression/movement_sparsity/linear_layer_sparsity": 0.4386132879871547, "compression/movement_sparsity/model_sparsity": 0.42354557221249584, "compression_loss": 91.47949981689453, "distillation_loss": 2.1912131309509277, "epoch": 1.24, "learning_rate": 4.111218451877793e-05, "loss": 93.7806, "step": 1472, "task_loss": 1.258070945739746 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8521040609421231, "compression/movement_sparsity/importance_threshold": -0.0019712957023202225, "compression/movement_sparsity/linear_layer_sparsity": 0.4405848417882359, "compression/movement_sparsity/model_sparsity": 0.425449397075301, "compression_loss": 91.71265411376953, "distillation_loss": 2.145315647125244, "epoch": 1.24, "learning_rate": 4.11061466006521e-05, "loss": 93.3714, "step": 1473, "task_loss": 1.054006814956665 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8542733535765097, "compression/movement_sparsity/importance_threshold": -0.001964695387378506, "compression/movement_sparsity/linear_layer_sparsity": 0.4426664198840933, "compression/movement_sparsity/model_sparsity": 0.4274594665598824, "compression_loss": 91.9453125, "distillation_loss": 2.528749942779541, "epoch": 1.25, "learning_rate": 4.1100108682526265e-05, "loss": 93.7783, "step": 1474, "task_loss": 1.4758806228637695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8564377986316093, "compression/movement_sparsity/importance_threshold": -0.001958109821737779, "compression/movement_sparsity/linear_layer_sparsity": 0.44477889349829514, "compression/movement_sparsity/model_sparsity": 0.4294993702067075, "compression_loss": 92.17738342285156, "distillation_loss": 1.3167691230773926, "epoch": 1.25, "learning_rate": 4.109407076440044e-05, "loss": 93.6363, "step": 1475, "task_loss": 0.7906906008720398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858597401529771, "compression/movement_sparsity/importance_threshold": -0.001951538988899939, "compression/movement_sparsity/linear_layer_sparsity": 0.4467375811224973, "compression/movement_sparsity/model_sparsity": 0.4313907708853903, "compression_loss": 92.40898132324219, "distillation_loss": 2.044130325317383, "epoch": 1.25, "learning_rate": 4.108803284627461e-05, "loss": 94.9576, "step": 1476, "task_loss": 2.0637025833129883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.860752167693343, "compression/movement_sparsity/importance_threshold": -0.0019449828723668875, "compression/movement_sparsity/linear_layer_sparsity": 0.4489042739269394, "compression/movement_sparsity/model_sparsity": 0.43348303112647313, "compression_loss": 92.64002227783203, "distillation_loss": 1.6544620990753174, "epoch": 1.25, "learning_rate": 4.1081994928148774e-05, "loss": 94.7044, "step": 1477, "task_loss": 1.0498977899551392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8629021025446726, "compression/movement_sparsity/importance_threshold": -0.0019384414556405266, "compression/movement_sparsity/linear_layer_sparsity": 0.4508674808106755, "compression/movement_sparsity/model_sparsity": 0.4353787958142221, "compression_loss": 92.87060546875, "distillation_loss": 1.251671314239502, "epoch": 1.25, "learning_rate": 4.107595701002295e-05, "loss": 94.4082, "step": 1478, "task_loss": 0.5553045868873596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.865047211506109, "compression/movement_sparsity/importance_threshold": -0.0019319147222227537, "compression/movement_sparsity/linear_layer_sparsity": 0.45276639258251905, "compression/movement_sparsity/model_sparsity": 0.43721247412496694, "compression_loss": 93.10057067871094, "distillation_loss": 1.6820611953735352, "epoch": 1.25, "learning_rate": 4.1069919091897115e-05, "loss": 94.7123, "step": 1479, "task_loss": 1.2488874197006226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8671875, "compression/movement_sparsity/importance_threshold": -0.0019254026556154713, "compression/movement_sparsity/linear_layer_sparsity": 0.45471199939482465, "compression/movement_sparsity/model_sparsity": 0.4390912433578832, "compression_loss": 93.330078125, "distillation_loss": 1.3987200260162354, "epoch": 1.25, "learning_rate": 4.106388117377128e-05, "loss": 95.0843, "step": 1480, "task_loss": 1.1722813844680786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8693229734486936, "compression/movement_sparsity/importance_threshold": -0.0019189052393205801, "compression/movement_sparsity/linear_layer_sparsity": 0.4566409242966077, "compression/movement_sparsity/model_sparsity": 0.4409539037552229, "compression_loss": 93.55905151367188, "distillation_loss": 2.373427391052246, "epoch": 1.25, "learning_rate": 4.1057843255645456e-05, "loss": 95.5699, "step": 1481, "task_loss": 2.5787312984466553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8714536372745385, "compression/movement_sparsity/importance_threshold": -0.0019124224568399786, "compression/movement_sparsity/linear_layer_sparsity": 0.4584991031118071, "compression/movement_sparsity/model_sparsity": 0.4427482484116936, "compression_loss": 93.78746795654297, "distillation_loss": 2.107168674468994, "epoch": 1.25, "learning_rate": 4.105180533751963e-05, "loss": 95.8652, "step": 1482, "task_loss": 1.9090006351470947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.873579496899882, "compression/movement_sparsity/importance_threshold": -0.00190595429167557, "compression/movement_sparsity/linear_layer_sparsity": 0.46043322695067934, "compression/movement_sparsity/model_sparsity": 0.44461592914663967, "compression_loss": 94.0154800415039, "distillation_loss": 1.4259358644485474, "epoch": 1.25, "learning_rate": 4.104576741939379e-05, "loss": 96.005, "step": 1483, "task_loss": 2.0791573524475098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8757005577470733, "compression/movement_sparsity/importance_threshold": -0.0018995007273292525, "compression/movement_sparsity/linear_layer_sparsity": 0.46233469049439696, "compression/movement_sparsity/model_sparsity": 0.4464520715680445, "compression_loss": 94.24298095703125, "distillation_loss": 2.386120319366455, "epoch": 1.25, "learning_rate": 4.1039729501267964e-05, "loss": 96.691, "step": 1484, "task_loss": 1.3493601083755493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.87781682523846, "compression/movement_sparsity/importance_threshold": -0.0018930617473029276, "compression/movement_sparsity/linear_layer_sparsity": 0.4642194959759273, "compression/movement_sparsity/model_sparsity": 0.44827212818294443, "compression_loss": 94.46988677978516, "distillation_loss": 2.0142054557800293, "epoch": 1.26, "learning_rate": 4.103369158314214e-05, "loss": 97.2868, "step": 1485, "task_loss": 1.6579411029815674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8799283047963904, "compression/movement_sparsity/importance_threshold": -0.0018866373350984959, "compression/movement_sparsity/linear_layer_sparsity": 0.46618483728567023, "compression/movement_sparsity/model_sparsity": 0.4501699539726006, "compression_loss": 94.69634246826172, "distillation_loss": 1.5398972034454346, "epoch": 1.26, "learning_rate": 4.1027653665016306e-05, "loss": 96.8952, "step": 1486, "task_loss": 1.0231614112854004 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8820350018432129, "compression/movement_sparsity/importance_threshold": -0.0018802274742178572, "compression/movement_sparsity/linear_layer_sparsity": 0.46836697188720083, "compression/movement_sparsity/model_sparsity": 0.4522771255375374, "compression_loss": 94.92230224609375, "distillation_loss": 1.5074758529663086, "epoch": 1.26, "learning_rate": 4.102161574689047e-05, "loss": 96.5317, "step": 1487, "task_loss": 0.5253912210464478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8841369218012756, "compression/movement_sparsity/importance_threshold": -0.0018738321481629118, "compression/movement_sparsity/linear_layer_sparsity": 0.47018136515884135, "compression/movement_sparsity/model_sparsity": 0.4540291888185705, "compression_loss": 95.14779663085938, "distillation_loss": 4.042708873748779, "epoch": 1.26, "learning_rate": 4.101557782876465e-05, "loss": 97.6667, "step": 1488, "task_loss": 2.2897398471832275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8862340700929262, "compression/movement_sparsity/importance_threshold": -0.0018674513404355621, "compression/movement_sparsity/linear_layer_sparsity": 0.47206333268847434, "compression/movement_sparsity/model_sparsity": 0.4558465049739513, "compression_loss": 95.37268829345703, "distillation_loss": 2.2302136421203613, "epoch": 1.26, "learning_rate": 4.1009539910638814e-05, "loss": 97.3186, "step": 1489, "task_loss": 1.1984933614730835 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8883264521405138, "compression/movement_sparsity/importance_threshold": -0.0018610850345377058, "compression/movement_sparsity/linear_layer_sparsity": 0.473868258171012, "compression/movement_sparsity/model_sparsity": 0.45758942571356354, "compression_loss": 95.59718322753906, "distillation_loss": 2.9210283756256104, "epoch": 1.26, "learning_rate": 4.100350199251298e-05, "loss": 97.5623, "step": 1490, "task_loss": 1.9384020566940308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8904140733663858, "compression/movement_sparsity/importance_threshold": -0.0018547332139712451, "compression/movement_sparsity/linear_layer_sparsity": 0.47591146429541714, "compression/movement_sparsity/model_sparsity": 0.4595624414219581, "compression_loss": 95.82103729248047, "distillation_loss": 2.1385228633880615, "epoch": 1.26, "learning_rate": 4.0997464074387155e-05, "loss": 98.023, "step": 1491, "task_loss": 1.2332741022109985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8924969391928907, "compression/movement_sparsity/importance_threshold": -0.0018483958622380805, "compression/movement_sparsity/linear_layer_sparsity": 0.4774953037858183, "compression/movement_sparsity/model_sparsity": 0.4610918711534038, "compression_loss": 96.04447937011719, "distillation_loss": 1.8286014795303345, "epoch": 1.26, "learning_rate": 4.099142615626132e-05, "loss": 98.1788, "step": 1492, "task_loss": 1.8470790386199951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8945750550423769, "compression/movement_sparsity/importance_threshold": -0.0018420729628401116, "compression/movement_sparsity/linear_layer_sparsity": 0.4793622230158948, "compression/movement_sparsity/model_sparsity": 0.4628946559646119, "compression_loss": 96.26741027832031, "distillation_loss": 2.382716655731201, "epoch": 1.26, "learning_rate": 4.098538823813549e-05, "loss": 98.6504, "step": 1493, "task_loss": 3.0367345809936523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8966484263371924, "compression/movement_sparsity/importance_threshold": -0.0018357644992792388, "compression/movement_sparsity/linear_layer_sparsity": 0.4812443336355394, "compression/movement_sparsity/model_sparsity": 0.46471211029442216, "compression_loss": 96.4897232055664, "distillation_loss": 1.8656718730926514, "epoch": 1.26, "learning_rate": 4.097935032000966e-05, "loss": 98.6476, "step": 1494, "task_loss": 1.6977344751358032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8987170584996854, "compression/movement_sparsity/importance_threshold": -0.0018294704550573635, "compression/movement_sparsity/linear_layer_sparsity": 0.48303757343379394, "compression/movement_sparsity/model_sparsity": 0.4664437467889558, "compression_loss": 96.71153259277344, "distillation_loss": 1.585442304611206, "epoch": 1.26, "learning_rate": 4.097331240188384e-05, "loss": 98.652, "step": 1495, "task_loss": 1.535503625869751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9007809569522041, "compression/movement_sparsity/importance_threshold": -0.0018231908136763855, "compression/movement_sparsity/linear_layer_sparsity": 0.48498663825471394, "compression/movement_sparsity/model_sparsity": 0.46832585523725245, "compression_loss": 96.93289947509766, "distillation_loss": 2.3262600898742676, "epoch": 1.26, "learning_rate": 4.0967274483758e-05, "loss": 98.9485, "step": 1496, "task_loss": 1.2926844358444214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9028401271170969, "compression/movement_sparsity/importance_threshold": -0.0018169255586382048, "compression/movement_sparsity/linear_layer_sparsity": 0.48660384156616027, "compression/movement_sparsity/model_sparsity": 0.46988750263985124, "compression_loss": 97.1536636352539, "distillation_loss": 2.212984323501587, "epoch": 1.27, "learning_rate": 4.096123656563217e-05, "loss": 99.3644, "step": 1497, "task_loss": 1.1809406280517578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9048945744167113, "compression/movement_sparsity/importance_threshold": -0.001810674673444724, "compression/movement_sparsity/linear_layer_sparsity": 0.48842699910101317, "compression/movement_sparsity/model_sparsity": 0.4716480291046934, "compression_loss": 97.37403869628906, "distillation_loss": 3.554448127746582, "epoch": 1.27, "learning_rate": 4.0955198647506346e-05, "loss": 100.0839, "step": 1498, "task_loss": 2.1796820163726807 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9069443042733967, "compression/movement_sparsity/importance_threshold": -0.0018044381415978396, "compression/movement_sparsity/linear_layer_sparsity": 0.49017284033291514, "compression/movement_sparsity/model_sparsity": 0.4733338953194437, "compression_loss": 97.59376525878906, "distillation_loss": 3.316885232925415, "epoch": 1.27, "learning_rate": 4.0949160729380506e-05, "loss": 99.9674, "step": 1499, "task_loss": 1.997530221939087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9089893221095, "compression/movement_sparsity/importance_threshold": -0.0017982159465994565, "compression/movement_sparsity/linear_layer_sparsity": 0.49200291388499684, "compression/movement_sparsity/model_sparsity": 0.47510110021504665, "compression_loss": 97.81307983398438, "distillation_loss": 2.1376092433929443, "epoch": 1.27, "learning_rate": 4.094312281125468e-05, "loss": 99.9552, "step": 1500, "task_loss": 1.950959324836731 }, { "epoch": 1.27, "eval_accuracy": 0.7577029702970297, "eval_loss": 99.48823547363281, "eval_runtime": 309.4335, "eval_samples_per_second": 81.601, "eval_steps_per_second": 0.64, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9110296333473702, "compression/movement_sparsity/importance_threshold": -0.0017920080719514724, "compression/movement_sparsity/linear_layer_sparsity": 0.4938485723241786, "compression/movement_sparsity/model_sparsity": 0.47688335460893305, "compression_loss": 98.0318374633789, "distillation_loss": 1.574967861175537, "epoch": 1.27, "learning_rate": 4.0937084893128854e-05, "loss": 100.5241, "step": 1501, "task_loss": 1.9993964433670044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9130652434093551, "compression/movement_sparsity/importance_threshold": -0.0017858145011557893, "compression/movement_sparsity/linear_layer_sparsity": 0.4958555289789707, "compression/movement_sparsity/model_sparsity": 0.47882136612851217, "compression_loss": 98.25009155273438, "distillation_loss": 3.003077983856201, "epoch": 1.27, "learning_rate": 4.093104697500302e-05, "loss": 100.6841, "step": 1502, "task_loss": 1.8956490755081177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9150961577178031, "compression/movement_sparsity/importance_threshold": -0.001779635217714307, "compression/movement_sparsity/linear_layer_sparsity": 0.49774046562634505, "compression/movement_sparsity/model_sparsity": 0.48064154940330583, "compression_loss": 98.46780395507812, "distillation_loss": 2.5956430435180664, "epoch": 1.27, "learning_rate": 4.092500905687719e-05, "loss": 100.6119, "step": 1503, "task_loss": 1.0168830156326294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9171223816950625, "compression/movement_sparsity/importance_threshold": -0.0017734702051289248, "compression/movement_sparsity/linear_layer_sparsity": 0.499530056629303, "compression/movement_sparsity/model_sparsity": 0.48236966244988627, "compression_loss": 98.6850814819336, "distillation_loss": 1.9505882263183594, "epoch": 1.27, "learning_rate": 4.091897113875136e-05, "loss": 101.0449, "step": 1504, "task_loss": 0.9648960828781128 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9191439207634815, "compression/movement_sparsity/importance_threshold": -0.0017673194469015443, "compression/movement_sparsity/linear_layer_sparsity": 0.5012300732849804, "compression/movement_sparsity/model_sparsity": 0.4840112783035782, "compression_loss": 98.90174865722656, "distillation_loss": 3.3877549171447754, "epoch": 1.27, "learning_rate": 4.091293322062553e-05, "loss": 101.4548, "step": 1505, "task_loss": 1.6129616498947144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9211607803454076, "compression/movement_sparsity/importance_threshold": -0.001761182926534067, "compression/movement_sparsity/linear_layer_sparsity": 0.5031061383516336, "compression/movement_sparsity/model_sparsity": 0.4858228947637407, "compression_loss": 99.11808013916016, "distillation_loss": 1.7419830560684204, "epoch": 1.27, "learning_rate": 4.09068953024997e-05, "loss": 100.9467, "step": 1506, "task_loss": 1.4925693273544312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9231729658631901, "compression/movement_sparsity/importance_threshold": -0.001755060627528391, "compression/movement_sparsity/linear_layer_sparsity": 0.5047107616662241, "compression/movement_sparsity/model_sparsity": 0.4873723943310762, "compression_loss": 99.333740234375, "distillation_loss": 3.8648133277893066, "epoch": 1.27, "learning_rate": 4.090085738437387e-05, "loss": 102.3531, "step": 1507, "task_loss": 1.719414472579956 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9251804827391765, "compression/movement_sparsity/importance_threshold": -0.0017489525333864182, "compression/movement_sparsity/linear_layer_sparsity": 0.5064230125178958, "compression/movement_sparsity/model_sparsity": 0.4890258240984933, "compression_loss": 99.54905700683594, "distillation_loss": 4.190714359283447, "epoch": 1.27, "learning_rate": 4.089481946624804e-05, "loss": 102.3868, "step": 1508, "task_loss": 3.118457317352295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9271833363957147, "compression/movement_sparsity/importance_threshold": -0.0017428586276100502, "compression/movement_sparsity/linear_layer_sparsity": 0.5081770933496341, "compression/movement_sparsity/model_sparsity": 0.49071964685747765, "compression_loss": 99.76375579833984, "distillation_loss": 3.2744250297546387, "epoch": 1.28, "learning_rate": 4.0888781548122205e-05, "loss": 102.5605, "step": 1509, "task_loss": 1.922554850578308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9291815322551539, "compression/movement_sparsity/importance_threshold": -0.0017367788937011847, "compression/movement_sparsity/linear_layer_sparsity": 0.5101608455742068, "compression/movement_sparsity/model_sparsity": 0.4926352510904006, "compression_loss": 99.97798919677734, "distillation_loss": 1.5416498184204102, "epoch": 1.28, "learning_rate": 4.088274362999638e-05, "loss": 102.285, "step": 1510, "task_loss": 2.3689537048339844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9311750757398416, "compression/movement_sparsity/importance_threshold": -0.0017307133151617236, "compression/movement_sparsity/linear_layer_sparsity": 0.5121285001724711, "compression/movement_sparsity/model_sparsity": 0.494535310700001, "compression_loss": 100.1917724609375, "distillation_loss": 2.8311927318573, "epoch": 1.28, "learning_rate": 4.087670571187055e-05, "loss": 102.6318, "step": 1511, "task_loss": 2.023484706878662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9331639722721259, "compression/movement_sparsity/importance_threshold": -0.0017246618754935684, "compression/movement_sparsity/linear_layer_sparsity": 0.5138681408372024, "compression/movement_sparsity/model_sparsity": 0.4962151893561381, "compression_loss": 100.40502166748047, "distillation_loss": 1.8619507551193237, "epoch": 1.28, "learning_rate": 4.087066779374472e-05, "loss": 102.2339, "step": 1512, "task_loss": 1.015195608139038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9351482272743554, "compression/movement_sparsity/importance_threshold": -0.0017186245581986173, "compression/movement_sparsity/linear_layer_sparsity": 0.5157130838263261, "compression/movement_sparsity/model_sparsity": 0.49799675287787687, "compression_loss": 100.61775207519531, "distillation_loss": 2.4256768226623535, "epoch": 1.28, "learning_rate": 4.086462987561889e-05, "loss": 102.7127, "step": 1513, "task_loss": 1.0213738679885864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9371278461688781, "compression/movement_sparsity/importance_threshold": -0.0017126013467787726, "compression/movement_sparsity/linear_layer_sparsity": 0.5174307124776016, "compression/movement_sparsity/model_sparsity": 0.49965537570093727, "compression_loss": 100.83002471923828, "distillation_loss": 3.780240535736084, "epoch": 1.28, "learning_rate": 4.085859195749306e-05, "loss": 104.5235, "step": 1514, "task_loss": 1.6220946311950684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9391028343780421, "compression/movement_sparsity/importance_threshold": -0.0017065922247359346, "compression/movement_sparsity/linear_layer_sparsity": 0.5190585640707468, "compression/movement_sparsity/model_sparsity": 0.5012273055840006, "compression_loss": 101.04183197021484, "distillation_loss": 2.694791316986084, "epoch": 1.28, "learning_rate": 4.085255403936723e-05, "loss": 103.2456, "step": 1515, "task_loss": 1.0968552827835083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9410731973241961, "compression/movement_sparsity/importance_threshold": -0.0017005971755720017, "compression/movement_sparsity/linear_layer_sparsity": 0.5210256343847969, "compression/movement_sparsity/model_sparsity": 0.503126800981347, "compression_loss": 101.25298309326172, "distillation_loss": 3.001863956451416, "epoch": 1.28, "learning_rate": 4.0846516121241396e-05, "loss": 104.2717, "step": 1516, "task_loss": 2.290720224380493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9430389404296875, "compression/movement_sparsity/importance_threshold": -0.0016946161827888773, "compression/movement_sparsity/linear_layer_sparsity": 0.5226865755431316, "compression/movement_sparsity/model_sparsity": 0.5047306837012402, "compression_loss": 101.46376037597656, "distillation_loss": 2.0780413150787354, "epoch": 1.28, "learning_rate": 4.084047820311557e-05, "loss": 103.4117, "step": 1517, "task_loss": 0.8307811617851257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9450000691168647, "compression/movement_sparsity/importance_threshold": -0.001688649229888461, "compression/movement_sparsity/linear_layer_sparsity": 0.5244397262897943, "compression/movement_sparsity/model_sparsity": 0.5064236083264326, "compression_loss": 101.67395782470703, "distillation_loss": 3.2058920860290527, "epoch": 1.28, "learning_rate": 4.083444028498974e-05, "loss": 104.2834, "step": 1518, "task_loss": 1.202286720275879 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9469565888080768, "compression/movement_sparsity/importance_threshold": -0.0016826963003726508, "compression/movement_sparsity/linear_layer_sparsity": 0.526173297553199, "compression/movement_sparsity/model_sparsity": 0.5080976260838503, "compression_loss": 101.88365936279297, "distillation_loss": 2.6888389587402344, "epoch": 1.28, "learning_rate": 4.0828402366863904e-05, "loss": 104.6186, "step": 1519, "task_loss": 0.8831520676612854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9489085049256708, "compression/movement_sparsity/importance_threshold": -0.0016767573777433501, "compression/movement_sparsity/linear_layer_sparsity": 0.5278062765384275, "compression/movement_sparsity/model_sparsity": 0.5096745072173052, "compression_loss": 102.0929183959961, "distillation_loss": 2.503772258758545, "epoch": 1.28, "learning_rate": 4.082236444873808e-05, "loss": 104.7031, "step": 1520, "task_loss": 2.653703451156616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9508558228919954, "compression/movement_sparsity/importance_threshold": -0.0016708324455024588, "compression/movement_sparsity/linear_layer_sparsity": 0.5297526584216294, "compression/movement_sparsity/model_sparsity": 0.5115540248950482, "compression_loss": 102.3016128540039, "distillation_loss": 3.3353288173675537, "epoch": 1.29, "learning_rate": 4.0816326530612245e-05, "loss": 105.0724, "step": 1521, "task_loss": 2.957670211791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9527985481293989, "compression/movement_sparsity/importance_threshold": -0.0016649214871518763, "compression/movement_sparsity/linear_layer_sparsity": 0.5316790911723766, "compression/movement_sparsity/model_sparsity": 0.5134142787544068, "compression_loss": 102.50987243652344, "distillation_loss": 3.0619993209838867, "epoch": 1.29, "learning_rate": 4.081028861248642e-05, "loss": 105.7065, "step": 1522, "task_loss": 2.0704619884490967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9547366860602291, "compression/movement_sparsity/importance_threshold": -0.001659024486193505, "compression/movement_sparsity/linear_layer_sparsity": 0.5334817510630634, "compression/movement_sparsity/model_sparsity": 0.5151550117322181, "compression_loss": 102.7176742553711, "distillation_loss": 3.695098876953125, "epoch": 1.29, "learning_rate": 4.0804250694360586e-05, "loss": 105.8055, "step": 1523, "task_loss": 2.4045908451080322 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9566702421068347, "compression/movement_sparsity/importance_threshold": -0.001653141426129243, "compression/movement_sparsity/linear_layer_sparsity": 0.5353800785506928, "compression/movement_sparsity/model_sparsity": 0.516988125830709, "compression_loss": 102.92491912841797, "distillation_loss": 3.8236589431762695, "epoch": 1.29, "learning_rate": 4.079821277623475e-05, "loss": 105.8456, "step": 1524, "task_loss": 1.9850085973739624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9585992216915636, "compression/movement_sparsity/importance_threshold": -0.001647272290460992, "compression/movement_sparsity/linear_layer_sparsity": 0.5371247035174959, "compression/movement_sparsity/model_sparsity": 0.5186728175628083, "compression_loss": 103.13175964355469, "distillation_loss": 1.726851224899292, "epoch": 1.29, "learning_rate": 4.079217485810893e-05, "loss": 105.0065, "step": 1525, "task_loss": 0.6944738030433655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9605236302367641, "compression/movement_sparsity/importance_threshold": -0.0016414170626906522, "compression/movement_sparsity/linear_layer_sparsity": 0.5389055898780797, "compression/movement_sparsity/model_sparsity": 0.5203925249982587, "compression_loss": 103.33800506591797, "distillation_loss": 2.450979471206665, "epoch": 1.29, "learning_rate": 4.0786136939983095e-05, "loss": 105.5405, "step": 1526, "task_loss": 1.497849702835083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9624434731647843, "compression/movement_sparsity/importance_threshold": -0.0016355757263201245, "compression/movement_sparsity/linear_layer_sparsity": 0.5406081225331282, "compression/movement_sparsity/model_sparsity": 0.5220365704190032, "compression_loss": 103.54375457763672, "distillation_loss": 4.790939807891846, "epoch": 1.29, "learning_rate": 4.078009902185727e-05, "loss": 106.9204, "step": 1527, "task_loss": 2.5016121864318848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9643587558979725, "compression/movement_sparsity/importance_threshold": -0.0016297482648513083, "compression/movement_sparsity/linear_layer_sparsity": 0.5422847201235688, "compression/movement_sparsity/model_sparsity": 0.5236555717243947, "compression_loss": 103.74905395507812, "distillation_loss": 1.872643232345581, "epoch": 1.29, "learning_rate": 4.0774061103731436e-05, "loss": 106.8003, "step": 1528, "task_loss": 0.9709722399711609 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9662694838586766, "compression/movement_sparsity/importance_threshold": -0.001623934661786106, "compression/movement_sparsity/linear_layer_sparsity": 0.5441093324068732, "compression/movement_sparsity/model_sparsity": 0.5254175029626037, "compression_loss": 103.9538345336914, "distillation_loss": 2.067540168762207, "epoch": 1.29, "learning_rate": 4.07680231856056e-05, "loss": 106.312, "step": 1529, "task_loss": 2.535637617111206 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9681756624692455, "compression/movement_sparsity/importance_threshold": -0.0016181349006264157, "compression/movement_sparsity/linear_layer_sparsity": 0.5459100725065706, "compression/movement_sparsity/model_sparsity": 0.527156382100152, "compression_loss": 104.1581039428711, "distillation_loss": 3.6433424949645996, "epoch": 1.29, "learning_rate": 4.076198526747978e-05, "loss": 107.3289, "step": 1530, "task_loss": 1.8894073963165283 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9700772971520265, "compression/movement_sparsity/importance_threshold": -0.0016123489648741393, "compression/movement_sparsity/linear_layer_sparsity": 0.5476512394647595, "compression/movement_sparsity/model_sparsity": 0.5288377346168709, "compression_loss": 104.36184692382812, "distillation_loss": 3.907491683959961, "epoch": 1.29, "learning_rate": 4.0755947349353944e-05, "loss": 107.7219, "step": 1531, "task_loss": 1.3845911026000977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9719743933293683, "compression/movement_sparsity/importance_threshold": -0.0016065768380311776, "compression/movement_sparsity/linear_layer_sparsity": 0.5495527745534828, "compression/movement_sparsity/model_sparsity": 0.5306739461254906, "compression_loss": 104.56517791748047, "distillation_loss": 3.3820080757141113, "epoch": 1.29, "learning_rate": 4.074990943122812e-05, "loss": 107.3892, "step": 1532, "task_loss": 1.8725686073303223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9738669564236192, "compression/movement_sparsity/importance_threshold": -0.0016008185035994295, "compression/movement_sparsity/linear_layer_sparsity": 0.5511934446268365, "compression/movement_sparsity/model_sparsity": 0.532258254134533, "compression_loss": 104.76799011230469, "distillation_loss": 2.408450126647949, "epoch": 1.3, "learning_rate": 4.0743871513102285e-05, "loss": 107.5906, "step": 1533, "task_loss": 1.7001370191574097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9757549918571271, "compression/movement_sparsity/importance_threshold": -0.0015950739450807968, "compression/movement_sparsity/linear_layer_sparsity": 0.5529400370812996, "compression/movement_sparsity/model_sparsity": 0.5339448457650383, "compression_loss": 104.97030639648438, "distillation_loss": 2.979132652282715, "epoch": 1.3, "learning_rate": 4.073783359497645e-05, "loss": 107.9387, "step": 1534, "task_loss": 2.4278464317321777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776385050522403, "compression/movement_sparsity/importance_threshold": -0.00158934314597718, "compression/movement_sparsity/linear_layer_sparsity": 0.5546922100462162, "compression/movement_sparsity/model_sparsity": 0.5356368261982956, "compression_loss": 105.17210388183594, "distillation_loss": 2.3510773181915283, "epoch": 1.3, "learning_rate": 4.0731795676850626e-05, "loss": 107.4359, "step": 1535, "task_loss": 1.115848183631897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9795175014313072, "compression/movement_sparsity/importance_threshold": -0.0015836260897904779, "compression/movement_sparsity/linear_layer_sparsity": 0.5564566410554624, "compression/movement_sparsity/model_sparsity": 0.5373406435743495, "compression_loss": 105.37339782714844, "distillation_loss": 3.2075424194335938, "epoch": 1.3, "learning_rate": 4.0725757758724793e-05, "loss": 108.1529, "step": 1536, "task_loss": 2.3909096717834473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9813919864166756, "compression/movement_sparsity/importance_threshold": -0.0015779227600225933, "compression/movement_sparsity/linear_layer_sparsity": 0.5581915955223129, "compression/movement_sparsity/model_sparsity": 0.5390159970179195, "compression_loss": 105.57422637939453, "distillation_loss": 4.313943386077881, "epoch": 1.3, "learning_rate": 4.071971984059896e-05, "loss": 108.7084, "step": 1537, "task_loss": 2.286760091781616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9832619654306939, "compression/movement_sparsity/importance_threshold": -0.0015722331401754252, "compression/movement_sparsity/linear_layer_sparsity": 0.5599624059612444, "compression/movement_sparsity/model_sparsity": 0.5407259746706236, "compression_loss": 105.77454376220703, "distillation_loss": 4.445111274719238, "epoch": 1.3, "learning_rate": 4.0713681922473135e-05, "loss": 109.1375, "step": 1538, "task_loss": 2.6167187690734863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851274438957104, "compression/movement_sparsity/importance_threshold": -0.0015665572137508739, "compression/movement_sparsity/linear_layer_sparsity": 0.5615362291508313, "compression/movement_sparsity/model_sparsity": 0.5422457321920018, "compression_loss": 105.97439575195312, "distillation_loss": 2.8024816513061523, "epoch": 1.3, "learning_rate": 4.07076440043473e-05, "loss": 109.2061, "step": 1539, "task_loss": 1.3571248054504395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986988427234073, "compression/movement_sparsity/importance_threshold": -0.0015608949642508412, "compression/movement_sparsity/linear_layer_sparsity": 0.5631657858999484, "compression/movement_sparsity/model_sparsity": 0.5438193086536838, "compression_loss": 106.1738052368164, "distillation_loss": 4.957732200622559, "epoch": 1.3, "learning_rate": 4.070160608622147e-05, "loss": 109.9056, "step": 1540, "task_loss": 2.3707399368286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888449208681303, "compression/movement_sparsity/importance_threshold": -0.0015552463751772262, "compression/movement_sparsity/linear_layer_sparsity": 0.564945599085445, "compression/movement_sparsity/model_sparsity": 0.5455379797809127, "compression_loss": 106.37268829345703, "distillation_loss": 2.8952476978302, "epoch": 1.3, "learning_rate": 4.069556816809564e-05, "loss": 109.2616, "step": 1541, "task_loss": 1.3892970085144043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906969302202301, "compression/movement_sparsity/importance_threshold": -0.00154961143003193, "compression/movement_sparsity/linear_layer_sparsity": 0.5665855894812434, "compression/movement_sparsity/model_sparsity": 0.5471216314614149, "compression_loss": 106.57115173339844, "distillation_loss": 3.9397149085998535, "epoch": 1.3, "learning_rate": 4.068953024996982e-05, "loss": 110.1103, "step": 1542, "task_loss": 2.8137640953063965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925444607127207, "compression/movement_sparsity/importance_threshold": -0.0015439901123168532, "compression/movement_sparsity/linear_layer_sparsity": 0.5682454455403232, "compression/movement_sparsity/model_sparsity": 0.5487244663585508, "compression_loss": 106.76905822753906, "distillation_loss": 5.813114166259766, "epoch": 1.3, "learning_rate": 4.0683492331843984e-05, "loss": 110.4854, "step": 1543, "task_loss": 2.9105277061462402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943875177679506, "compression/movement_sparsity/importance_threshold": -0.0015383824055338956, "compression/movement_sparsity/linear_layer_sparsity": 0.5698902771481817, "compression/movement_sparsity/model_sparsity": 0.5503127929405854, "compression_loss": 106.96646881103516, "distillation_loss": 3.403697967529297, "epoch": 1.3, "learning_rate": 4.067745441371815e-05, "loss": 109.8051, "step": 1544, "task_loss": 2.2851877212524414 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962261068082677, "compression/movement_sparsity/importance_threshold": -0.0015327882931849579, "compression/movement_sparsity/linear_layer_sparsity": 0.5714345045536965, "compression/movement_sparsity/model_sparsity": 0.5518039713841217, "compression_loss": 107.16344451904297, "distillation_loss": 3.2117934226989746, "epoch": 1.31, "learning_rate": 4.0671416495592325e-05, "loss": 110.1169, "step": 1545, "task_loss": 1.3985012769699097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99806023325602, "compression/movement_sparsity/importance_threshold": -0.0015272077587719416, "compression/movement_sparsity/linear_layer_sparsity": 0.5729264921807986, "compression/movement_sparsity/model_sparsity": 0.553244704646342, "compression_loss": 107.35993957519531, "distillation_loss": 3.835989475250244, "epoch": 1.31, "learning_rate": 4.066537857746649e-05, "loss": 110.8416, "step": 1546, "task_loss": 2.1488330364227295 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998899025335564, "compression/movement_sparsity/importance_threshold": -0.0015216407857967453, "compression/movement_sparsity/linear_layer_sparsity": 0.574570775276946, "compression/movement_sparsity/model_sparsity": 0.5548325015597302, "compression_loss": 107.55591583251953, "distillation_loss": 4.261700630187988, "epoch": 1.31, "learning_rate": 4.065934065934066e-05, "loss": 111.0396, "step": 1547, "task_loss": 2.754579544067383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0017151200632244, "compression/movement_sparsity/importance_threshold": -0.0015160873577612704, "compression/movement_sparsity/linear_layer_sparsity": 0.5763561212762255, "compression/movement_sparsity/model_sparsity": 0.5565565154315678, "compression_loss": 107.75151062011719, "distillation_loss": 4.171040058135986, "epoch": 1.31, "learning_rate": 4.0653302741214834e-05, "loss": 111.2257, "step": 1548, "task_loss": 3.101100444793701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0035358912673722, "compression/movement_sparsity/importance_threshold": -0.0015105474581674186, "compression/movement_sparsity/linear_layer_sparsity": 0.5780150949469003, "compression/movement_sparsity/model_sparsity": 0.558158498253055, "compression_loss": 107.94657135009766, "distillation_loss": 3.8038101196289062, "epoch": 1.31, "learning_rate": 4.0647264823089e-05, "loss": 111.2947, "step": 1549, "task_loss": 1.986894130706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0053522215683484, "compression/movement_sparsity/importance_threshold": -0.0015050210705170887, "compression/movement_sparsity/linear_layer_sparsity": 0.5796629075966678, "compression/movement_sparsity/model_sparsity": 0.5597497034690383, "compression_loss": 108.14114379882812, "distillation_loss": 3.1140084266662598, "epoch": 1.31, "learning_rate": 4.064122690496317e-05, "loss": 111.0902, "step": 1550, "task_loss": 1.6885912418365479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0071641163885015, "compression/movement_sparsity/importance_threshold": -0.0014995081783121802, "compression/movement_sparsity/linear_layer_sparsity": 0.5813403875755133, "compression/movement_sparsity/model_sparsity": 0.5613695568500785, "compression_loss": 108.33525085449219, "distillation_loss": 4.062110900878906, "epoch": 1.31, "learning_rate": 4.063518898683734e-05, "loss": 112.0002, "step": 1551, "task_loss": 1.5930944681167603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0089715811501785, "compression/movement_sparsity/importance_threshold": -0.0014940087650545972, "compression/movement_sparsity/linear_layer_sparsity": 0.5830102599354072, "compression/movement_sparsity/model_sparsity": 0.5629820639572818, "compression_loss": 108.52886962890625, "distillation_loss": 3.835289239883423, "epoch": 1.31, "learning_rate": 4.0629151068711516e-05, "loss": 111.6382, "step": 1552, "task_loss": 1.416469693183899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.010774621275729, "compression/movement_sparsity/importance_threshold": -0.001488522814246236, "compression/movement_sparsity/linear_layer_sparsity": 0.5846757442016112, "compression/movement_sparsity/model_sparsity": 0.5645903337153128, "compression_loss": 108.72208404541016, "distillation_loss": 3.3042421340942383, "epoch": 1.31, "learning_rate": 4.0623113150585676e-05, "loss": 111.8082, "step": 1553, "task_loss": 2.028383255004883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0125732421875, "compression/movement_sparsity/importance_threshold": -0.0014830503093890002, "compression/movement_sparsity/linear_layer_sparsity": 0.5862880228318238, "compression/movement_sparsity/model_sparsity": 0.5661472256146284, "compression_loss": 108.9146728515625, "distillation_loss": 2.4580612182617188, "epoch": 1.31, "learning_rate": 4.061707523245985e-05, "loss": 111.8634, "step": 1554, "task_loss": 2.290478229522705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0143674493078403, "compression/movement_sparsity/importance_threshold": -0.0014775912339847893, "compression/movement_sparsity/linear_layer_sparsity": 0.5877998403497043, "compression/movement_sparsity/model_sparsity": 0.567607107549875, "compression_loss": 109.10681915283203, "distillation_loss": 2.4995217323303223, "epoch": 1.31, "learning_rate": 4.0611037314334024e-05, "loss": 111.6819, "step": 1555, "task_loss": 0.9833770990371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0161572480590984, "compression/movement_sparsity/importance_threshold": -0.0014721455715355023, "compression/movement_sparsity/linear_layer_sparsity": 0.5895517032862625, "compression/movement_sparsity/model_sparsity": 0.5692987886052016, "compression_loss": 109.2984848022461, "distillation_loss": 4.18853235244751, "epoch": 1.32, "learning_rate": 4.0604999396208185e-05, "loss": 112.338, "step": 1556, "task_loss": 2.8779873847961426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0179426438636214, "compression/movement_sparsity/importance_threshold": -0.001466713305543042, "compression/movement_sparsity/linear_layer_sparsity": 0.5911535482697937, "compression/movement_sparsity/model_sparsity": 0.5708456052856969, "compression_loss": 109.48965454101562, "distillation_loss": 4.407125473022461, "epoch": 1.32, "learning_rate": 4.059896147808236e-05, "loss": 113.0236, "step": 1557, "task_loss": 2.585376024246216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0197236421437588, "compression/movement_sparsity/importance_threshold": -0.0014612944195093068, "compression/movement_sparsity/linear_layer_sparsity": 0.5929016550935465, "compression/movement_sparsity/model_sparsity": 0.5725336592622483, "compression_loss": 109.68032836914062, "distillation_loss": 3.2936229705810547, "epoch": 1.32, "learning_rate": 4.059292355995653e-05, "loss": 112.6751, "step": 1558, "task_loss": 1.312962532043457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0215002483218583, "compression/movement_sparsity/importance_threshold": -0.0014558888969361983, "compression/movement_sparsity/linear_layer_sparsity": 0.5946845804867961, "compression/movement_sparsity/model_sparsity": 0.5742553356833195, "compression_loss": 109.87052917480469, "distillation_loss": 2.9357213973999023, "epoch": 1.32, "learning_rate": 4.05868856418307e-05, "loss": 113.0302, "step": 1559, "task_loss": 2.1829209327697754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0232724678202672, "compression/movement_sparsity/importance_threshold": -0.001450496721325618, "compression/movement_sparsity/linear_layer_sparsity": 0.5964785834317793, "compression/movement_sparsity/model_sparsity": 0.575987709108144, "compression_loss": 110.0603256225586, "distillation_loss": 4.300307273864746, "epoch": 1.32, "learning_rate": 4.058084772370487e-05, "loss": 113.8539, "step": 1560, "task_loss": 3.6302967071533203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0250403060613351, "compression/movement_sparsity/importance_threshold": -0.0014451178761794637, "compression/movement_sparsity/linear_layer_sparsity": 0.5979925472998342, "compression/movement_sparsity/model_sparsity": 0.5774496636598337, "compression_loss": 110.24958801269531, "distillation_loss": 4.431704521179199, "epoch": 1.32, "learning_rate": 4.057480980557904e-05, "loss": 113.5981, "step": 1561, "task_loss": 2.741734504699707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0268037684674094, "compression/movement_sparsity/importance_threshold": -0.0014397523449996377, "compression/movement_sparsity/linear_layer_sparsity": 0.5994690096606744, "compression/movement_sparsity/model_sparsity": 0.5788754049964495, "compression_loss": 110.43843841552734, "distillation_loss": 2.4832940101623535, "epoch": 1.32, "learning_rate": 4.056877188745321e-05, "loss": 113.3837, "step": 1562, "task_loss": 1.6893428564071655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0285628604608383, "compression/movement_sparsity/importance_threshold": -0.0014344001112880406, "compression/movement_sparsity/linear_layer_sparsity": 0.6010364057239057, "compression/movement_sparsity/model_sparsity": 0.5803889561830345, "compression_loss": 110.62679290771484, "distillation_loss": 1.991780161857605, "epoch": 1.32, "learning_rate": 4.0562733969327375e-05, "loss": 114.1848, "step": 1563, "task_loss": 1.2107614278793335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0303175874639705, "compression/movement_sparsity/importance_threshold": -0.0014290611585465714, "compression/movement_sparsity/linear_layer_sparsity": 0.602795661644398, "compression/movement_sparsity/model_sparsity": 0.5820877762505537, "compression_loss": 110.81462860107422, "distillation_loss": 2.226391077041626, "epoch": 1.32, "learning_rate": 4.055669605120155e-05, "loss": 114.0549, "step": 1564, "task_loss": 0.8832390904426575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0320679548991538, "compression/movement_sparsity/importance_threshold": -0.001423735470277132, "compression/movement_sparsity/linear_layer_sparsity": 0.6043786902913999, "compression/movement_sparsity/model_sparsity": 0.5836164229935654, "compression_loss": 111.00200653076172, "distillation_loss": 2.9719977378845215, "epoch": 1.32, "learning_rate": 4.0550658133075716e-05, "loss": 113.6935, "step": 1565, "task_loss": 1.253769874572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0338139681887362, "compression/movement_sparsity/importance_threshold": -0.001418423029981623, "compression/movement_sparsity/linear_layer_sparsity": 0.6061804081728436, "compression/movement_sparsity/model_sparsity": 0.585356246323049, "compression_loss": 111.18893432617188, "distillation_loss": 3.539640426635742, "epoch": 1.32, "learning_rate": 4.0544620214949883e-05, "loss": 114.8504, "step": 1566, "task_loss": 1.5691958665847778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0355556327550666, "compression/movement_sparsity/importance_threshold": -0.0014131238211619428, "compression/movement_sparsity/linear_layer_sparsity": 0.6077575343568657, "compression/movement_sparsity/model_sparsity": 0.5868791933708423, "compression_loss": 111.37535095214844, "distillation_loss": 5.6533203125, "epoch": 1.32, "learning_rate": 4.053858229682406e-05, "loss": 114.7147, "step": 1567, "task_loss": 2.867809772491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0372929540204925, "compression/movement_sparsity/importance_threshold": -0.001407837827319994, "compression/movement_sparsity/linear_layer_sparsity": 0.6093499830963, "compression/movement_sparsity/model_sparsity": 0.5884169365971316, "compression_loss": 111.56130981445312, "distillation_loss": 3.4040427207946777, "epoch": 1.33, "learning_rate": 4.053254437869823e-05, "loss": 114.4516, "step": 1568, "task_loss": 2.158994674682617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.039025937407362, "compression/movement_sparsity/importance_threshold": -0.001402565031957677, "compression/movement_sparsity/linear_layer_sparsity": 0.6110209047829458, "compression/movement_sparsity/model_sparsity": 0.5900304569834848, "compression_loss": 111.74673461914062, "distillation_loss": 2.1351006031036377, "epoch": 1.33, "learning_rate": 4.052650646057239e-05, "loss": 115.3035, "step": 1569, "task_loss": 1.0629807710647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0407545883380243, "compression/movement_sparsity/importance_threshold": -0.0013973054185768908, "compression/movement_sparsity/linear_layer_sparsity": 0.6126326587497825, "compression/movement_sparsity/model_sparsity": 0.5915868422432254, "compression_loss": 111.93167877197266, "distillation_loss": 4.234724998474121, "epoch": 1.33, "learning_rate": 4.0520468542446566e-05, "loss": 116.3498, "step": 1570, "task_loss": 2.031952142715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0424789122348264, "compression/movement_sparsity/importance_threshold": -0.001392058970679537, "compression/movement_sparsity/linear_layer_sparsity": 0.6140838895719052, "compression/movement_sparsity/model_sparsity": 0.5929882188221001, "compression_loss": 112.11622619628906, "distillation_loss": 4.4984283447265625, "epoch": 1.33, "learning_rate": 4.051443062432074e-05, "loss": 115.949, "step": 1571, "task_loss": 2.239861011505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0441989145201171, "compression/movement_sparsity/importance_threshold": -0.0013868256717675157, "compression/movement_sparsity/linear_layer_sparsity": 0.615532759408836, "compression/movement_sparsity/model_sparsity": 0.5943873155228874, "compression_loss": 112.30032348632812, "distillation_loss": 4.103102684020996, "epoch": 1.33, "learning_rate": 4.050839270619491e-05, "loss": 116.7342, "step": 1572, "task_loss": 2.0543394088745117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.045914600616245, "compression/movement_sparsity/importance_threshold": -0.0013816055053427262, "compression/movement_sparsity/linear_layer_sparsity": 0.61706463337668, "compression/movement_sparsity/model_sparsity": 0.5958665649073404, "compression_loss": 112.48394775390625, "distillation_loss": 3.718435525894165, "epoch": 1.33, "learning_rate": 4.0502354788069074e-05, "loss": 117.0921, "step": 1573, "task_loss": 2.3761038780212402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0476259759455573, "compression/movement_sparsity/importance_threshold": -0.0013763984549070717, "compression/movement_sparsity/linear_layer_sparsity": 0.6187296645245138, "compression/movement_sparsity/model_sparsity": 0.5974743971130111, "compression_loss": 112.66709899902344, "distillation_loss": 4.060519695281982, "epoch": 1.33, "learning_rate": 4.049631686994325e-05, "loss": 115.3242, "step": 1574, "task_loss": 1.791649580001831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0493330459304029, "compression/movement_sparsity/importance_threshold": -0.00137120450396245, "compression/movement_sparsity/linear_layer_sparsity": 0.6203383778286033, "compression/movement_sparsity/model_sparsity": 0.5990278461661241, "compression_loss": 112.84989166259766, "distillation_loss": 4.050065994262695, "epoch": 1.33, "learning_rate": 4.0490278951817415e-05, "loss": 117.2844, "step": 1575, "task_loss": 1.2940340042114258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0510358159931297, "compression/movement_sparsity/importance_threshold": -0.0013660236360107632, "compression/movement_sparsity/linear_layer_sparsity": 0.6218575764062505, "compression/movement_sparsity/model_sparsity": 0.6004948555990277, "compression_loss": 113.03218078613281, "distillation_loss": 3.8653383255004883, "epoch": 1.33, "learning_rate": 4.048424103369158e-05, "loss": 116.1084, "step": 1576, "task_loss": 1.5775467157363892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.052734291556086, "compression/movement_sparsity/importance_threshold": -0.0013608558345539114, "compression/movement_sparsity/linear_layer_sparsity": 0.6232577360183887, "compression/movement_sparsity/model_sparsity": 0.6018469154210943, "compression_loss": 113.21402740478516, "distillation_loss": 3.3859777450561523, "epoch": 1.33, "learning_rate": 4.0478203115565756e-05, "loss": 117.2089, "step": 1577, "task_loss": 1.5121915340423584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0544284780416202, "compression/movement_sparsity/importance_threshold": -0.001355701083093794, "compression/movement_sparsity/linear_layer_sparsity": 0.624745526338483, "compression/movement_sparsity/model_sparsity": 0.6032835955667148, "compression_loss": 113.39543151855469, "distillation_loss": 3.930778741836548, "epoch": 1.33, "learning_rate": 4.0472165197439924e-05, "loss": 117.133, "step": 1578, "task_loss": 2.0993504524230957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0561183808720802, "compression/movement_sparsity/importance_threshold": -0.0013505593651323127, "compression/movement_sparsity/linear_layer_sparsity": 0.626201693766007, "compression/movement_sparsity/model_sparsity": 0.6046897391634084, "compression_loss": 113.57635498046875, "distillation_loss": 3.901603937149048, "epoch": 1.33, "learning_rate": 4.046612727931409e-05, "loss": 116.7314, "step": 1579, "task_loss": 2.076174259185791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0578040054698143, "compression/movement_sparsity/importance_threshold": -0.0013454306641713684, "compression/movement_sparsity/linear_layer_sparsity": 0.627513709930981, "compression/movement_sparsity/model_sparsity": 0.6059566835368817, "compression_loss": 113.75682067871094, "distillation_loss": 5.026679039001465, "epoch": 1.34, "learning_rate": 4.0460089361188265e-05, "loss": 118.5587, "step": 1580, "task_loss": 1.9855003356933594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0594853572571705, "compression/movement_sparsity/importance_threshold": -0.00134031496371286, "compression/movement_sparsity/linear_layer_sparsity": 0.629108293096422, "compression/movement_sparsity/model_sparsity": 0.6074964878650783, "compression_loss": 113.93682861328125, "distillation_loss": 2.958275318145752, "epoch": 1.34, "learning_rate": 4.045405144306243e-05, "loss": 117.3651, "step": 1581, "task_loss": 1.7174173593521118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0611624416564975, "compression/movement_sparsity/importance_threshold": -0.001335212247258689, "compression/movement_sparsity/linear_layer_sparsity": 0.6307970175653482, "compression/movement_sparsity/model_sparsity": 0.6091271994533728, "compression_loss": 114.1164321899414, "distillation_loss": 4.417238235473633, "epoch": 1.34, "learning_rate": 4.0448013524936606e-05, "loss": 118.0716, "step": 1582, "task_loss": 2.5637683868408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0628352640901428, "compression/movement_sparsity/importance_threshold": -0.001330122498310756, "compression/movement_sparsity/linear_layer_sparsity": 0.6321590079168842, "compression/movement_sparsity/model_sparsity": 0.610442401246361, "compression_loss": 114.29553985595703, "distillation_loss": 2.7364110946655273, "epoch": 1.34, "learning_rate": 4.044197560681077e-05, "loss": 117.8519, "step": 1583, "task_loss": 1.3970770835876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0645038299804552, "compression/movement_sparsity/importance_threshold": -0.0013250457003709604, "compression/movement_sparsity/linear_layer_sparsity": 0.6334853092346859, "compression/movement_sparsity/model_sparsity": 0.6117231400337162, "compression_loss": 114.47422790527344, "distillation_loss": 3.7062671184539795, "epoch": 1.34, "learning_rate": 4.043593768868495e-05, "loss": 118.2151, "step": 1584, "task_loss": 3.64023756980896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0661681447497828, "compression/movement_sparsity/importance_threshold": -0.001319981836941203, "compression/movement_sparsity/linear_layer_sparsity": 0.6349112012005824, "compression/movement_sparsity/model_sparsity": 0.6131000482240274, "compression_loss": 114.65242767333984, "distillation_loss": 4.417819976806641, "epoch": 1.34, "learning_rate": 4.0429899770559114e-05, "loss": 118.1918, "step": 1585, "task_loss": 2.5193395614624023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0678282138204733, "compression/movement_sparsity/importance_threshold": -0.0013149308915233857, "compression/movement_sparsity/linear_layer_sparsity": 0.6364070403338309, "compression/movement_sparsity/model_sparsity": 0.6145445006813093, "compression_loss": 114.83014678955078, "distillation_loss": 5.502124786376953, "epoch": 1.34, "learning_rate": 4.042386185243328e-05, "loss": 118.9, "step": 1586, "task_loss": 2.3925704956054688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0694840426148755, "compression/movement_sparsity/importance_threshold": -0.0013098928476194065, "compression/movement_sparsity/linear_layer_sparsity": 0.6378704099586069, "compression/movement_sparsity/model_sparsity": 0.6159575990576227, "compression_loss": 115.00747680664062, "distillation_loss": 3.6127185821533203, "epoch": 1.34, "learning_rate": 4.0417823934307455e-05, "loss": 118.0272, "step": 1587, "task_loss": 1.549818992614746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0711356365553373, "compression/movement_sparsity/importance_threshold": -0.001304867688731168, "compression/movement_sparsity/linear_layer_sparsity": 0.6392611614024806, "compression/movement_sparsity/model_sparsity": 0.6173005739109474, "compression_loss": 115.18421936035156, "distillation_loss": 3.0144705772399902, "epoch": 1.34, "learning_rate": 4.041178601618162e-05, "loss": 118.5309, "step": 1588, "task_loss": 1.477476954460144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0727830010642068, "compression/movement_sparsity/importance_threshold": -0.0012998553983605702, "compression/movement_sparsity/linear_layer_sparsity": 0.6407908890201501, "compression/movement_sparsity/model_sparsity": 0.6187777506789575, "compression_loss": 115.36055755615234, "distillation_loss": 4.683080673217773, "epoch": 1.34, "learning_rate": 4.040574809805579e-05, "loss": 119.6105, "step": 1589, "task_loss": 1.9613977670669556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.074426141563833, "compression/movement_sparsity/importance_threshold": -0.0012948559600095113, "compression/movement_sparsity/linear_layer_sparsity": 0.6422824116047144, "compression/movement_sparsity/model_sparsity": 0.6202180348742818, "compression_loss": 115.5364761352539, "distillation_loss": 4.053908348083496, "epoch": 1.34, "learning_rate": 4.0399710179929964e-05, "loss": 119.3435, "step": 1590, "task_loss": 2.8004140853881836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0760650634765625, "compression/movement_sparsity/importance_threshold": -0.0012898693571798958, "compression/movement_sparsity/linear_layer_sparsity": 0.6436950796687028, "compression/movement_sparsity/model_sparsity": 0.6215821734443968, "compression_loss": 115.71187591552734, "distillation_loss": 3.4829654693603516, "epoch": 1.34, "learning_rate": 4.039367226180413e-05, "loss": 119.6533, "step": 1591, "task_loss": 2.3413448333740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0776997722247448, "compression/movement_sparsity/importance_threshold": -0.0012848955733736215, "compression/movement_sparsity/linear_layer_sparsity": 0.6452024971687257, "compression/movement_sparsity/model_sparsity": 0.6230378065159353, "compression_loss": 115.8868179321289, "distillation_loss": 4.101276874542236, "epoch": 1.35, "learning_rate": 4.03876343436783e-05, "loss": 119.8203, "step": 1592, "task_loss": 2.8775336742401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.079330273230728, "compression/movement_sparsity/importance_threshold": -0.0012799345920925884, "compression/movement_sparsity/linear_layer_sparsity": 0.6465461958471082, "compression/movement_sparsity/model_sparsity": 0.6243353450110147, "compression_loss": 116.0613021850586, "distillation_loss": 4.3211212158203125, "epoch": 1.35, "learning_rate": 4.038159642555247e-05, "loss": 120.0946, "step": 1593, "task_loss": 2.5605499744415283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0809565719168597, "compression/movement_sparsity/importance_threshold": -0.0012749863968386991, "compression/movement_sparsity/linear_layer_sparsity": 0.6479110837713795, "compression/movement_sparsity/model_sparsity": 0.625653344836201, "compression_loss": 116.23534393310547, "distillation_loss": 3.038931131362915, "epoch": 1.35, "learning_rate": 4.037555850742664e-05, "loss": 120.7138, "step": 1594, "task_loss": 1.8257532119750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0825786737054883, "compression/movement_sparsity/importance_threshold": -0.0012700509711138525, "compression/movement_sparsity/linear_layer_sparsity": 0.6494545480301657, "compression/movement_sparsity/model_sparsity": 0.6271437863494465, "compression_loss": 116.40900421142578, "distillation_loss": 3.7389769554138184, "epoch": 1.35, "learning_rate": 4.036952058930081e-05, "loss": 120.3522, "step": 1595, "task_loss": 2.2965049743652344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0841965840189625, "compression/movement_sparsity/importance_threshold": -0.0012651282984199493, "compression/movement_sparsity/linear_layer_sparsity": 0.6510160058579143, "compression/movement_sparsity/model_sparsity": 0.6286516032972057, "compression_loss": 116.58209228515625, "distillation_loss": 4.013985633850098, "epoch": 1.35, "learning_rate": 4.036348267117498e-05, "loss": 120.2463, "step": 1596, "task_loss": 1.764885425567627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0858103082796298, "compression/movement_sparsity/importance_threshold": -0.0012602183622588909, "compression/movement_sparsity/linear_layer_sparsity": 0.652394213077435, "compression/movement_sparsity/model_sparsity": 0.6299824648588745, "compression_loss": 116.75476837158203, "distillation_loss": 3.6824915409088135, "epoch": 1.35, "learning_rate": 4.035744475304915e-05, "loss": 120.4639, "step": 1597, "task_loss": 2.8522896766662598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0874198519098388, "compression/movement_sparsity/importance_threshold": -0.0012553211461325759, "compression/movement_sparsity/linear_layer_sparsity": 0.6536660328733085, "compression/movement_sparsity/model_sparsity": 0.6312105937321845, "compression_loss": 116.92699432373047, "distillation_loss": 4.527782917022705, "epoch": 1.35, "learning_rate": 4.035140683492332e-05, "loss": 120.5602, "step": 1598, "task_loss": 2.5846996307373047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0890252203319377, "compression/movement_sparsity/importance_threshold": -0.0012504366335429062, "compression/movement_sparsity/linear_layer_sparsity": 0.6551264691528461, "compression/movement_sparsity/model_sparsity": 0.6326208595326924, "compression_loss": 117.09871673583984, "distillation_loss": 3.6654696464538574, "epoch": 1.35, "learning_rate": 4.034536891679749e-05, "loss": 121.0475, "step": 1599, "task_loss": 3.141796112060547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0906264189682742, "compression/movement_sparsity/importance_threshold": -0.001245564807991783, "compression/movement_sparsity/linear_layer_sparsity": 0.6563391450772457, "compression/movement_sparsity/model_sparsity": 0.6337918763084615, "compression_loss": 117.27008056640625, "distillation_loss": 4.536157608032227, "epoch": 1.35, "learning_rate": 4.033933099867166e-05, "loss": 120.5534, "step": 1600, "task_loss": 2.339637279510498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0922234532411974, "compression/movement_sparsity/importance_threshold": -0.001240705652981104, "compression/movement_sparsity/linear_layer_sparsity": 0.6577863574548751, "compression/movement_sparsity/model_sparsity": 0.6351893724887733, "compression_loss": 117.44084930419922, "distillation_loss": 5.000787734985352, "epoch": 1.35, "learning_rate": 4.033329308054583e-05, "loss": 122.1345, "step": 1601, "task_loss": 2.2831034660339355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0938163285730549, "compression/movement_sparsity/importance_threshold": -0.0012358591520127723, "compression/movement_sparsity/linear_layer_sparsity": 0.6591165460513262, "compression/movement_sparsity/model_sparsity": 0.6364738650147975, "compression_loss": 117.61127471923828, "distillation_loss": 3.8120532035827637, "epoch": 1.35, "learning_rate": 4.032725516242e-05, "loss": 121.8695, "step": 1602, "task_loss": 2.370018482208252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0954050503861945, "compression/movement_sparsity/importance_threshold": -0.0012310252885886878, "compression/movement_sparsity/linear_layer_sparsity": 0.6605452998174554, "compression/movement_sparsity/model_sparsity": 0.6378535366936994, "compression_loss": 117.7812271118164, "distillation_loss": 4.224034309387207, "epoch": 1.35, "learning_rate": 4.032121724429417e-05, "loss": 122.1887, "step": 1603, "task_loss": 1.872103214263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.096989624102965, "compression/movement_sparsity/importance_threshold": -0.00122620404621075, "compression/movement_sparsity/linear_layer_sparsity": 0.6619422160559967, "compression/movement_sparsity/model_sparsity": 0.6392024645620298, "compression_loss": 117.95074462890625, "distillation_loss": 3.277021884918213, "epoch": 1.36, "learning_rate": 4.031517932616834e-05, "loss": 122.2191, "step": 1604, "task_loss": 1.7506492137908936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0985700551457152, "compression/movement_sparsity/importance_threshold": -0.0012213954083808592, "compression/movement_sparsity/linear_layer_sparsity": 0.6632956925518405, "compression/movement_sparsity/model_sparsity": 0.6405094449764608, "compression_loss": 118.11978912353516, "distillation_loss": 4.338192462921143, "epoch": 1.36, "learning_rate": 4.030914140804251e-05, "loss": 121.9303, "step": 1605, "task_loss": 2.8371849060058594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1001463489367915, "compression/movement_sparsity/importance_threshold": -0.0012165993586009181, "compression/movement_sparsity/linear_layer_sparsity": 0.6647438230903779, "compression/movement_sparsity/model_sparsity": 0.6419078277760288, "compression_loss": 118.28837585449219, "distillation_loss": 3.3354732990264893, "epoch": 1.36, "learning_rate": 4.030310348991668e-05, "loss": 121.9622, "step": 1606, "task_loss": 1.886362075805664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.101718510898544, "compression/movement_sparsity/importance_threshold": -0.0012118158803728243, "compression/movement_sparsity/linear_layer_sparsity": 0.6660084049164965, "compression/movement_sparsity/model_sparsity": 0.6431289673261115, "compression_loss": 118.45646667480469, "distillation_loss": 4.3371758460998535, "epoch": 1.36, "learning_rate": 4.0297065571790846e-05, "loss": 122.3046, "step": 1607, "task_loss": 2.660590887069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1032865464533197, "compression/movement_sparsity/importance_threshold": -0.0012070449571984802, "compression/movement_sparsity/linear_layer_sparsity": 0.6675098126360309, "compression/movement_sparsity/model_sparsity": 0.6445787970716095, "compression_loss": 118.62409973144531, "distillation_loss": 4.052557945251465, "epoch": 1.36, "learning_rate": 4.029102765366502e-05, "loss": 122.1051, "step": 1608, "task_loss": 2.041536331176758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1048504610234668, "compression/movement_sparsity/importance_threshold": -0.001202286572579786, "compression/movement_sparsity/linear_layer_sparsity": 0.6688460944821439, "compression/movement_sparsity/model_sparsity": 0.6458691735254246, "compression_loss": 118.79136657714844, "distillation_loss": 4.164777755737305, "epoch": 1.36, "learning_rate": 4.028498973553919e-05, "loss": 122.6175, "step": 1609, "task_loss": 2.1523187160491943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1064102600313346, "compression/movement_sparsity/importance_threshold": -0.001197540710018641, "compression/movement_sparsity/linear_layer_sparsity": 0.6703787435208842, "compression/movement_sparsity/model_sparsity": 0.6473491713547044, "compression_loss": 118.9581298828125, "distillation_loss": 3.983055591583252, "epoch": 1.36, "learning_rate": 4.0278951817413355e-05, "loss": 122.874, "step": 1610, "task_loss": 2.0215349197387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1079659488992701, "compression/movement_sparsity/importance_threshold": -0.001192807353016947, "compression/movement_sparsity/linear_layer_sparsity": 0.6719127042180646, "compression/movement_sparsity/model_sparsity": 0.6488304357829214, "compression_loss": 119.1243896484375, "distillation_loss": 6.733787536621094, "epoch": 1.36, "learning_rate": 4.027291389928753e-05, "loss": 123.9715, "step": 1611, "task_loss": 4.168168067932129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1095175330496219, "compression/movement_sparsity/importance_threshold": -0.001188086485076604, "compression/movement_sparsity/linear_layer_sparsity": 0.673421087575666, "compression/movement_sparsity/model_sparsity": 0.6502870015318593, "compression_loss": 119.29032135009766, "distillation_loss": 3.532302141189575, "epoch": 1.36, "learning_rate": 4.0266875981161696e-05, "loss": 123.0577, "step": 1612, "task_loss": 3.5787298679351807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1110650179047383, "compression/movement_sparsity/importance_threshold": -0.0011833780896995123, "compression/movement_sparsity/linear_layer_sparsity": 0.6749048356029317, "compression/movement_sparsity/model_sparsity": 0.6517197782498456, "compression_loss": 119.45575714111328, "distillation_loss": 2.62689208984375, "epoch": 1.36, "learning_rate": 4.026083806303586e-05, "loss": 122.7526, "step": 1613, "task_loss": 1.0241925716400146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1126084088869674, "compression/movement_sparsity/importance_threshold": -0.0011786821503875725, "compression/movement_sparsity/linear_layer_sparsity": 0.6764263832416031, "compression/movement_sparsity/model_sparsity": 0.6531890560463006, "compression_loss": 119.62071990966797, "distillation_loss": 3.7785258293151855, "epoch": 1.36, "learning_rate": 4.025480014491004e-05, "loss": 123.8019, "step": 1614, "task_loss": 2.802356243133545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1141477114186573, "compression/movement_sparsity/importance_threshold": -0.001173998650642685, "compression/movement_sparsity/linear_layer_sparsity": 0.6779000076505459, "compression/movement_sparsity/model_sparsity": 0.6546120569233973, "compression_loss": 119.78531646728516, "distillation_loss": 4.216497421264648, "epoch": 1.36, "learning_rate": 4.024876222678421e-05, "loss": 123.9349, "step": 1615, "task_loss": 2.1455159187316895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1156829309221568, "compression/movement_sparsity/importance_threshold": -0.00116932757396675, "compression/movement_sparsity/linear_layer_sparsity": 0.6792506342703248, "compression/movement_sparsity/model_sparsity": 0.6559162853637733, "compression_loss": 119.94945526123047, "distillation_loss": 2.9395642280578613, "epoch": 1.37, "learning_rate": 4.024272430865838e-05, "loss": 124.6031, "step": 1616, "task_loss": 1.4227871894836426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.117214072819813, "compression/movement_sparsity/importance_threshold": -0.0011646689038616689, "compression/movement_sparsity/linear_layer_sparsity": 0.6805866537847499, "compression/movement_sparsity/model_sparsity": 0.6572064084978011, "compression_loss": 120.11316680908203, "distillation_loss": 6.896747589111328, "epoch": 1.37, "learning_rate": 4.0236686390532545e-05, "loss": 124.8741, "step": 1617, "task_loss": 3.7542102336883545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.118741142533975, "compression/movement_sparsity/importance_threshold": -0.0011600226238293412, "compression/movement_sparsity/linear_layer_sparsity": 0.6820786175635167, "compression/movement_sparsity/model_sparsity": 0.6586471187309498, "compression_loss": 120.27645111083984, "distillation_loss": 5.756622314453125, "epoch": 1.37, "learning_rate": 4.023064847240672e-05, "loss": 124.8108, "step": 1618, "task_loss": 2.2962188720703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1202641454869908, "compression/movement_sparsity/importance_threshold": -0.0011553887173716675, "compression/movement_sparsity/linear_layer_sparsity": 0.6834281113873703, "compression/movement_sparsity/model_sparsity": 0.6599502532904254, "compression_loss": 120.4393081665039, "distillation_loss": 6.1587629318237305, "epoch": 1.37, "learning_rate": 4.0224610554280886e-05, "loss": 124.2767, "step": 1619, "task_loss": 2.4162888526916504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1217830871012082, "compression/movement_sparsity/importance_threshold": -0.0011507671679905486, "compression/movement_sparsity/linear_layer_sparsity": 0.6848800338112158, "compression/movement_sparsity/model_sparsity": 0.6613522977123761, "compression_loss": 120.60175323486328, "distillation_loss": 5.484973907470703, "epoch": 1.37, "learning_rate": 4.0218572636155054e-05, "loss": 125.7443, "step": 1620, "task_loss": 2.0053982734680176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1232979727989758, "compression/movement_sparsity/importance_threshold": -0.001146157959187885, "compression/movement_sparsity/linear_layer_sparsity": 0.6860141533192304, "compression/movement_sparsity/model_sparsity": 0.6624474567263307, "compression_loss": 120.76371765136719, "distillation_loss": 4.485365867614746, "epoch": 1.37, "learning_rate": 4.021253471802923e-05, "loss": 125.1201, "step": 1621, "task_loss": 2.640087127685547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1248088080026417, "compression/movement_sparsity/importance_threshold": -0.0011415610744655766, "compression/movement_sparsity/linear_layer_sparsity": 0.6875402559899386, "compression/movement_sparsity/model_sparsity": 0.6639211330754592, "compression_loss": 120.92523956298828, "distillation_loss": 4.5597920417785645, "epoch": 1.37, "learning_rate": 4.0206496799903395e-05, "loss": 125.0075, "step": 1622, "task_loss": 2.3231732845306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.126315598134554, "compression/movement_sparsity/importance_threshold": -0.001136976497325525, "compression/movement_sparsity/linear_layer_sparsity": 0.6889218377489003, "compression/movement_sparsity/model_sparsity": 0.6652552532507578, "compression_loss": 121.08631896972656, "distillation_loss": 4.363131523132324, "epoch": 1.37, "learning_rate": 4.020045888177756e-05, "loss": 125.8885, "step": 1623, "task_loss": 2.6924426555633545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.127818348617061, "compression/movement_sparsity/importance_threshold": -0.001132404211269629, "compression/movement_sparsity/linear_layer_sparsity": 0.6901701430861958, "compression/movement_sparsity/model_sparsity": 0.666460675459481, "compression_loss": 121.24699401855469, "distillation_loss": 5.69732666015625, "epoch": 1.37, "learning_rate": 4.0194420963651736e-05, "loss": 125.9492, "step": 1624, "task_loss": 3.3758645057678223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.129317064872511, "compression/movement_sparsity/importance_threshold": -0.0011278441997997898, "compression/movement_sparsity/linear_layer_sparsity": 0.6914210359678685, "compression/movement_sparsity/model_sparsity": 0.6676685963224714, "compression_loss": 121.40718078613281, "distillation_loss": 3.309131145477295, "epoch": 1.37, "learning_rate": 4.018838304552591e-05, "loss": 125.4112, "step": 1625, "task_loss": 1.5360796451568604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.130811752323252, "compression/movement_sparsity/importance_threshold": -0.001123296446417909, "compression/movement_sparsity/linear_layer_sparsity": 0.692645731453245, "compression/movement_sparsity/model_sparsity": 0.6688512197503214, "compression_loss": 121.56698608398438, "distillation_loss": 4.673027992248535, "epoch": 1.37, "learning_rate": 4.018234512740007e-05, "loss": 126.2882, "step": 1626, "task_loss": 1.935702919960022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1323024163916324, "compression/movement_sparsity/importance_threshold": -0.0011187609346258848, "compression/movement_sparsity/linear_layer_sparsity": 0.6940715041774652, "compression/movement_sparsity/model_sparsity": 0.6702280127952746, "compression_loss": 121.72631072998047, "distillation_loss": 4.488462924957275, "epoch": 1.38, "learning_rate": 4.0176307209274244e-05, "loss": 126.1917, "step": 1627, "task_loss": 2.3700146675109863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1337890625, "compression/movement_sparsity/importance_threshold": -0.00111423764792562, "compression/movement_sparsity/linear_layer_sparsity": 0.6952129928210787, "compression/movement_sparsity/model_sparsity": 0.6713302877923502, "compression_loss": 121.88512420654297, "distillation_loss": 5.591846942901611, "epoch": 1.38, "learning_rate": 4.017026929114842e-05, "loss": 126.0691, "step": 1628, "task_loss": 2.6662471294403076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1352716960707032, "compression/movement_sparsity/importance_threshold": -0.0011097265698190141, "compression/movement_sparsity/linear_layer_sparsity": 0.6965690687854672, "compression/movement_sparsity/model_sparsity": 0.6726397783755843, "compression_loss": 122.04363250732422, "distillation_loss": 4.409328460693359, "epoch": 1.38, "learning_rate": 4.016423137302258e-05, "loss": 126.66, "step": 1629, "task_loss": 2.1426570415496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1367503225260904, "compression/movement_sparsity/importance_threshold": -0.0011052276838079663, "compression/movement_sparsity/linear_layer_sparsity": 0.6978997939694619, "compression/movement_sparsity/model_sparsity": 0.6739247890557193, "compression_loss": 122.20164489746094, "distillation_loss": 4.842893123626709, "epoch": 1.38, "learning_rate": 4.015819345489675e-05, "loss": 126.8245, "step": 1630, "task_loss": 2.280773639678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1382249472885098, "compression/movement_sparsity/importance_threshold": -0.0011007409733943792, "compression/movement_sparsity/linear_layer_sparsity": 0.6993608503057166, "compression/movement_sparsity/model_sparsity": 0.6753356536120886, "compression_loss": 122.3591537475586, "distillation_loss": 5.322578430175781, "epoch": 1.38, "learning_rate": 4.0152155536770927e-05, "loss": 127.1086, "step": 1631, "task_loss": 2.8617260456085205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1396955757803091, "compression/movement_sparsity/importance_threshold": -0.001096266422080152, "compression/movement_sparsity/linear_layer_sparsity": 0.7008561170789185, "compression/movement_sparsity/model_sparsity": 0.6767795533716524, "compression_loss": 122.51630401611328, "distillation_loss": 3.590932846069336, "epoch": 1.38, "learning_rate": 4.0146117618645094e-05, "loss": 126.9574, "step": 1632, "task_loss": 2.191063642501831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1411622134238373, "compression/movement_sparsity/importance_threshold": -0.001091804013367185, "compression/movement_sparsity/linear_layer_sparsity": 0.702183968538513, "compression/movement_sparsity/model_sparsity": 0.6780617890486609, "compression_loss": 122.67301177978516, "distillation_loss": 5.580835342407227, "epoch": 1.38, "learning_rate": 4.014007970051926e-05, "loss": 127.1921, "step": 1633, "task_loss": 2.5486977100372314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1426248656414415, "compression/movement_sparsity/importance_threshold": -0.00108735373075738, "compression/movement_sparsity/linear_layer_sparsity": 0.703367227541355, "compression/movement_sparsity/model_sparsity": 0.6792043994646247, "compression_loss": 122.82927703857422, "distillation_loss": 6.5097198486328125, "epoch": 1.38, "learning_rate": 4.0134041782393435e-05, "loss": 127.2932, "step": 1634, "task_loss": 3.167710065841675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1440835378554708, "compression/movement_sparsity/importance_threshold": -0.0010829155577526356, "compression/movement_sparsity/linear_layer_sparsity": 0.7048051867648989, "compression/movement_sparsity/model_sparsity": 0.68059296036516, "compression_loss": 122.98515319824219, "distillation_loss": 4.602743148803711, "epoch": 1.38, "learning_rate": 4.01280038642676e-05, "loss": 127.9748, "step": 1635, "task_loss": 2.8524296283721924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1455382354882733, "compression/movement_sparsity/importance_threshold": -0.0010784894778548535, "compression/movement_sparsity/linear_layer_sparsity": 0.7059523274639719, "compression/movement_sparsity/model_sparsity": 0.6817006932522022, "compression_loss": 123.14057922363281, "distillation_loss": 4.733675003051758, "epoch": 1.38, "learning_rate": 4.012196594614177e-05, "loss": 127.5029, "step": 1636, "task_loss": 2.788015842437744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1469889639621966, "compression/movement_sparsity/importance_threshold": -0.0010740754745659346, "compression/movement_sparsity/linear_layer_sparsity": 0.7072163727025468, "compression/movement_sparsity/model_sparsity": 0.6829213146481741, "compression_loss": 123.29558563232422, "distillation_loss": 3.9853811264038086, "epoch": 1.38, "learning_rate": 4.011592802801594e-05, "loss": 127.6096, "step": 1637, "task_loss": 1.706752896308899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1484357286995897, "compression/movement_sparsity/importance_threshold": -0.0010696735313877777, "compression/movement_sparsity/linear_layer_sparsity": 0.7083849411308614, "compression/movement_sparsity/model_sparsity": 0.6840497391560391, "compression_loss": 123.45013427734375, "distillation_loss": 3.209531307220459, "epoch": 1.38, "learning_rate": 4.010989010989011e-05, "loss": 126.747, "step": 1638, "task_loss": 2.3288533687591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1498785351228002, "compression/movement_sparsity/importance_threshold": -0.0010652836318222844, "compression/movement_sparsity/linear_layer_sparsity": 0.7097099665627261, "compression/movement_sparsity/model_sparsity": 0.6853292458880643, "compression_loss": 123.60420227050781, "distillation_loss": 5.037357807159424, "epoch": 1.39, "learning_rate": 4.010385219176428e-05, "loss": 128.1229, "step": 1639, "task_loss": 2.4419302940368652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1513173886541763, "compression/movement_sparsity/importance_threshold": -0.0010609057593713557, "compression/movement_sparsity/linear_layer_sparsity": 0.7110772989413628, "compression/movement_sparsity/model_sparsity": 0.6866496061930885, "compression_loss": 123.75798034667969, "distillation_loss": 4.979372024536133, "epoch": 1.39, "learning_rate": 4.009781427363845e-05, "loss": 128.3517, "step": 1640, "task_loss": 2.1684041023254395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1527522947160667, "compression/movement_sparsity/importance_threshold": -0.0010565398975368907, "compression/movement_sparsity/linear_layer_sparsity": 0.7123028887393121, "compression/movement_sparsity/model_sparsity": 0.687833093211123, "compression_loss": 123.91126251220703, "distillation_loss": 4.463488578796387, "epoch": 1.39, "learning_rate": 4.0091776355512625e-05, "loss": 128.0204, "step": 1641, "task_loss": 2.0553979873657227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1541832587308194, "compression/movement_sparsity/importance_threshold": -0.00105218602982079, "compression/movement_sparsity/linear_layer_sparsity": 0.7136189114246118, "compression/movement_sparsity/model_sparsity": 0.6891039064686233, "compression_loss": 124.06411743164062, "distillation_loss": 5.677126884460449, "epoch": 1.39, "learning_rate": 4.0085738437386786e-05, "loss": 128.5432, "step": 1642, "task_loss": 2.7731428146362305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.155610286120782, "compression/movement_sparsity/importance_threshold": -0.0010478441397249556, "compression/movement_sparsity/linear_layer_sparsity": 0.7148010853281989, "compression/movement_sparsity/model_sparsity": 0.6902454690618298, "compression_loss": 124.21653747558594, "distillation_loss": 5.455066680908203, "epoch": 1.39, "learning_rate": 4.007970051926096e-05, "loss": 129.4895, "step": 1643, "task_loss": 2.425184488296509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1570333823083034, "compression/movement_sparsity/importance_threshold": -0.0010435142107512868, "compression/movement_sparsity/linear_layer_sparsity": 0.7160801430938274, "compression/movement_sparsity/model_sparsity": 0.691480587258367, "compression_loss": 124.36854553222656, "distillation_loss": 4.8237128257751465, "epoch": 1.39, "learning_rate": 4.0073662601135134e-05, "loss": 129.3783, "step": 1644, "task_loss": 1.8762375116348267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1584525527157319, "compression/movement_sparsity/importance_threshold": -0.0010391962264016835, "compression/movement_sparsity/linear_layer_sparsity": 0.7171808868566292, "compression/movement_sparsity/model_sparsity": 0.6925435170866326, "compression_loss": 124.5201187133789, "distillation_loss": 4.7112345695495605, "epoch": 1.39, "learning_rate": 4.00676246830093e-05, "loss": 128.8596, "step": 1645, "task_loss": 2.30698299407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1598678027654148, "compression/movement_sparsity/importance_threshold": -0.0010348901701780471, "compression/movement_sparsity/linear_layer_sparsity": 0.7185453097383627, "compression/movement_sparsity/model_sparsity": 0.693861067844923, "compression_loss": 124.67130279541016, "distillation_loss": 4.574435234069824, "epoch": 1.39, "learning_rate": 4.006158676488347e-05, "loss": 129.5713, "step": 1646, "task_loss": 1.7015719413757324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1612791378797014, "compression/movement_sparsity/importance_threshold": -0.0010305960255822771, "compression/movement_sparsity/linear_layer_sparsity": 0.7198698224310192, "compression/movement_sparsity/model_sparsity": 0.695140079451909, "compression_loss": 124.822021484375, "distillation_loss": 3.9105381965637207, "epoch": 1.39, "learning_rate": 4.005554884675764e-05, "loss": 129.2591, "step": 1647, "task_loss": 3.3302674293518066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.162686563480939, "compression/movement_sparsity/importance_threshold": -0.001026313776116276, "compression/movement_sparsity/linear_layer_sparsity": 0.7212080717647921, "compression/movement_sparsity/model_sparsity": 0.6964323558041303, "compression_loss": 124.97233581542969, "distillation_loss": 5.138538837432861, "epoch": 1.39, "learning_rate": 4.004951092863181e-05, "loss": 129.7413, "step": 1648, "task_loss": 2.8371078968048096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1640900849914761, "compression/movement_sparsity/importance_threshold": -0.0010220434052819424, "compression/movement_sparsity/linear_layer_sparsity": 0.7222304975945615, "compression/movement_sparsity/model_sparsity": 0.6974196581612975, "compression_loss": 125.1222915649414, "distillation_loss": 4.873781204223633, "epoch": 1.39, "learning_rate": 4.0043473010505977e-05, "loss": 129.6072, "step": 1649, "task_loss": 3.3788414001464844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1654897078336612, "compression/movement_sparsity/importance_threshold": -0.0010177848965811768, "compression/movement_sparsity/linear_layer_sparsity": 0.7234908701895046, "compression/movement_sparsity/model_sparsity": 0.6986367330802447, "compression_loss": 125.27172088623047, "distillation_loss": 3.5707449913024902, "epoch": 1.39, "learning_rate": 4.003743509238015e-05, "loss": 129.4218, "step": 1650, "task_loss": 1.7528709173202515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1668854374298419, "compression/movement_sparsity/importance_threshold": -0.0010135382335158807, "compression/movement_sparsity/linear_layer_sparsity": 0.7247468308424224, "compression/movement_sparsity/model_sparsity": 0.6998495476209478, "compression_loss": 125.4207992553711, "distillation_loss": 6.146169662475586, "epoch": 1.4, "learning_rate": 4.003139717425432e-05, "loss": 131.0971, "step": 1651, "task_loss": 2.7813305854797363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.168277279202367, "compression/movement_sparsity/importance_threshold": -0.001009303399587954, "compression/movement_sparsity/linear_layer_sparsity": 0.7260744318944965, "compression/movement_sparsity/model_sparsity": 0.7011315414927046, "compression_loss": 125.56947326660156, "distillation_loss": 4.3812785148620605, "epoch": 1.4, "learning_rate": 4.0025359256128485e-05, "loss": 129.5112, "step": 1652, "task_loss": 1.6023848056793213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1696652385735846, "compression/movement_sparsity/importance_threshold": -0.0010050803782992962, "compression/movement_sparsity/linear_layer_sparsity": 0.7270859709592143, "compression/movement_sparsity/model_sparsity": 0.7021083310786913, "compression_loss": 125.71769714355469, "distillation_loss": 4.6329240798950195, "epoch": 1.4, "learning_rate": 4.001932133800266e-05, "loss": 130.7312, "step": 1653, "task_loss": 2.0416886806488037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1710493209658424, "compression/movement_sparsity/importance_threshold": -0.00100086915315181, "compression/movement_sparsity/linear_layer_sparsity": 0.7281695081481177, "compression/movement_sparsity/model_sparsity": 0.7031546454318054, "compression_loss": 125.86560821533203, "distillation_loss": 6.298150539398193, "epoch": 1.4, "learning_rate": 4.0013283419876826e-05, "loss": 130.6777, "step": 1654, "task_loss": 3.371328115463257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.172429531801489, "compression/movement_sparsity/importance_threshold": -0.0009966697076473937, "compression/movement_sparsity/linear_layer_sparsity": 0.7292097009876645, "compression/movement_sparsity/model_sparsity": 0.7041591044473063, "compression_loss": 126.01304626464844, "distillation_loss": 6.301766395568848, "epoch": 1.4, "learning_rate": 4.0007245501751e-05, "loss": 131.1358, "step": 1655, "task_loss": 2.768242120742798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1738058765028727, "compression/movement_sparsity/importance_threshold": -0.0009924820252879486, "compression/movement_sparsity/linear_layer_sparsity": 0.730290543314682, "compression/movement_sparsity/model_sparsity": 0.7052028165153309, "compression_loss": 126.1600341796875, "distillation_loss": 5.068981170654297, "epoch": 1.4, "learning_rate": 4.000120758362517e-05, "loss": 131.0489, "step": 1656, "task_loss": 2.4785683155059814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1751783604923411, "compression/movement_sparsity/importance_threshold": -0.0009883060895753762, "compression/movement_sparsity/linear_layer_sparsity": 0.7314464602011349, "compression/movement_sparsity/model_sparsity": 0.7063190241007179, "compression_loss": 126.30661010742188, "distillation_loss": 4.9326324462890625, "epoch": 1.4, "learning_rate": 3.999516966549934e-05, "loss": 131.1656, "step": 1657, "task_loss": 2.522188663482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.176546989192243, "compression/movement_sparsity/importance_threshold": -0.0009841418840115753, "compression/movement_sparsity/linear_layer_sparsity": 0.732678966016313, "compression/movement_sparsity/model_sparsity": 0.7075091895495133, "compression_loss": 126.4527359008789, "distillation_loss": 3.8941092491149902, "epoch": 1.4, "learning_rate": 3.998913174737351e-05, "loss": 131.3093, "step": 1658, "task_loss": 1.4228382110595703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1779117680249265, "compression/movement_sparsity/importance_threshold": -0.0009799893920984467, "compression/movement_sparsity/linear_layer_sparsity": 0.7334243457352309, "compression/movement_sparsity/model_sparsity": 0.70822896318203, "compression_loss": 126.59847259521484, "distillation_loss": 5.147817611694336, "epoch": 1.4, "learning_rate": 3.9983093829247675e-05, "loss": 131.7854, "step": 1659, "task_loss": 2.1113858222961426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1792727024127396, "compression/movement_sparsity/importance_threshold": -0.0009758485973378919, "compression/movement_sparsity/linear_layer_sparsity": 0.7345577974898578, "compression/movement_sparsity/model_sparsity": 0.7093234773819801, "compression_loss": 126.74385833740234, "distillation_loss": 5.70904541015625, "epoch": 1.4, "learning_rate": 3.997705591112185e-05, "loss": 132.1562, "step": 1660, "task_loss": 1.9991743564605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1806297977780305, "compression/movement_sparsity/importance_threshold": -0.0009717194832318103, "compression/movement_sparsity/linear_layer_sparsity": 0.7356654214973855, "compression/movement_sparsity/model_sparsity": 0.7103930510973991, "compression_loss": 126.8887710571289, "distillation_loss": 4.492949485778809, "epoch": 1.4, "learning_rate": 3.9971017992996017e-05, "loss": 131.4506, "step": 1661, "task_loss": 2.9952688217163086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1819830595431478, "compression/movement_sparsity/importance_threshold": -0.0009676020332821025, "compression/movement_sparsity/linear_layer_sparsity": 0.7366837215651529, "compression/movement_sparsity/model_sparsity": 0.7113763694251813, "compression_loss": 127.03329467773438, "distillation_loss": 7.356082439422607, "epoch": 1.4, "learning_rate": 3.9964980074870184e-05, "loss": 133.021, "step": 1662, "task_loss": 3.834693670272827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.183332493130439, "compression/movement_sparsity/importance_threshold": -0.0009634962309906696, "compression/movement_sparsity/linear_layer_sparsity": 0.7376262793200973, "compression/movement_sparsity/model_sparsity": 0.7122865474215966, "compression_loss": 127.17744445800781, "distillation_loss": 5.107526779174805, "epoch": 1.41, "learning_rate": 3.995894215674436e-05, "loss": 132.149, "step": 1663, "task_loss": 2.1269724369049072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1846781039622531, "compression/movement_sparsity/importance_threshold": -0.0009594020598594107, "compression/movement_sparsity/linear_layer_sparsity": 0.7387805506714165, "compression/movement_sparsity/model_sparsity": 0.713401166001044, "compression_loss": 127.3211898803711, "distillation_loss": 4.4077911376953125, "epoch": 1.41, "learning_rate": 3.9952904238618525e-05, "loss": 132.5378, "step": 1664, "task_loss": 2.9023802280426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1860198974609375, "compression/movement_sparsity/importance_threshold": -0.0009553195033902284, "compression/movement_sparsity/linear_layer_sparsity": 0.7400044472375614, "compression/movement_sparsity/model_sparsity": 0.7145830179549957, "compression_loss": 127.46450805664062, "distillation_loss": 6.424215316772461, "epoch": 1.41, "learning_rate": 3.99468663204927e-05, "loss": 133.0279, "step": 1665, "task_loss": 3.93650484085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1873578790488408, "compression/movement_sparsity/importance_threshold": -0.0009512485450850218, "compression/movement_sparsity/linear_layer_sparsity": 0.7409059500833356, "compression/movement_sparsity/model_sparsity": 0.7154535514046703, "compression_loss": 127.60740661621094, "distillation_loss": 4.932514190673828, "epoch": 1.41, "learning_rate": 3.9940828402366866e-05, "loss": 132.3366, "step": 1666, "task_loss": 3.054093837738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1886920541483113, "compression/movement_sparsity/importance_threshold": -0.0009471891684456907, "compression/movement_sparsity/linear_layer_sparsity": 0.741904146004469, "compression/movement_sparsity/model_sparsity": 0.7164174562251029, "compression_loss": 127.74990844726562, "distillation_loss": 4.859705924987793, "epoch": 1.41, "learning_rate": 3.993479048424103e-05, "loss": 132.7677, "step": 1667, "task_loss": 2.708770513534546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1900224281816971, "compression/movement_sparsity/importance_threshold": -0.0009431413569741371, "compression/movement_sparsity/linear_layer_sparsity": 0.7430379316357898, "compression/movement_sparsity/model_sparsity": 0.7175122928320552, "compression_loss": 127.89200592041016, "distillation_loss": 4.668491363525391, "epoch": 1.41, "learning_rate": 3.992875256611521e-05, "loss": 132.4724, "step": 1668, "task_loss": 1.7856543064117432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1913490065713461, "compression/movement_sparsity/importance_threshold": -0.0009391050941722602, "compression/movement_sparsity/linear_layer_sparsity": 0.744100541910492, "compression/movement_sparsity/model_sparsity": 0.71853839917485, "compression_loss": 128.03366088867188, "distillation_loss": 4.772624969482422, "epoch": 1.41, "learning_rate": 3.9922714647989374e-05, "loss": 132.6452, "step": 1669, "task_loss": 2.0295071601867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.192671794739607, "compression/movement_sparsity/importance_threshold": -0.0009350803635419609, "compression/movement_sparsity/linear_layer_sparsity": 0.7450633230537469, "compression/movement_sparsity/model_sparsity": 0.7194681058239728, "compression_loss": 128.17494201660156, "distillation_loss": 5.829484462738037, "epoch": 1.41, "learning_rate": 3.991667672986354e-05, "loss": 133.6759, "step": 1670, "task_loss": 2.7783358097076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1939907981088276, "compression/movement_sparsity/importance_threshold": -0.0009310671485851406, "compression/movement_sparsity/linear_layer_sparsity": 0.7462112507478837, "compression/movement_sparsity/model_sparsity": 0.7205765986703774, "compression_loss": 128.3157501220703, "distillation_loss": 4.5924072265625, "epoch": 1.41, "learning_rate": 3.9910638811737716e-05, "loss": 132.5307, "step": 1671, "task_loss": 2.5231821537017822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1953060221013563, "compression/movement_sparsity/importance_threshold": -0.0009270654328036982, "compression/movement_sparsity/linear_layer_sparsity": 0.7470638048855127, "compression/movement_sparsity/model_sparsity": 0.7213998649506155, "compression_loss": 128.45616149902344, "distillation_loss": 5.341917037963867, "epoch": 1.41, "learning_rate": 3.990460089361188e-05, "loss": 133.514, "step": 1672, "task_loss": 2.1828250885009766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1966174721395413, "compression/movement_sparsity/importance_threshold": -0.0009230751996995349, "compression/movement_sparsity/linear_layer_sparsity": 0.748172358978116, "compression/movement_sparsity/model_sparsity": 0.7224703367998265, "compression_loss": 128.59616088867188, "distillation_loss": 6.447425842285156, "epoch": 1.41, "learning_rate": 3.989856297548606e-05, "loss": 134.2175, "step": 1673, "task_loss": 3.79872727394104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1979251536457305, "compression/movement_sparsity/importance_threshold": -0.0009190964327745518, "compression/movement_sparsity/linear_layer_sparsity": 0.7492809488432222, "compression/movement_sparsity/model_sparsity": 0.723540843192645, "compression_loss": 128.7356719970703, "distillation_loss": 3.91593074798584, "epoch": 1.41, "learning_rate": 3.9892525057360224e-05, "loss": 132.9877, "step": 1674, "task_loss": 1.5377082824707031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1992290720422725, "compression/movement_sparsity/importance_threshold": -0.0009151291155306482, "compression/movement_sparsity/linear_layer_sparsity": 0.7503076912216758, "compression/movement_sparsity/model_sparsity": 0.7245323138117697, "compression_loss": 128.87486267089844, "distillation_loss": 5.185428142547607, "epoch": 1.42, "learning_rate": 3.98864871392344e-05, "loss": 133.8421, "step": 1675, "task_loss": 2.693092107772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2005292327515154, "compression/movement_sparsity/importance_threshold": -0.0009111732314697248, "compression/movement_sparsity/linear_layer_sparsity": 0.7512944160935435, "compression/movement_sparsity/model_sparsity": 0.725485141648768, "compression_loss": 129.0135498046875, "distillation_loss": 4.279391765594482, "epoch": 1.42, "learning_rate": 3.9880449221108565e-05, "loss": 133.8896, "step": 1676, "task_loss": 2.457597255706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2018256411958068, "compression/movement_sparsity/importance_threshold": -0.0009072287640936832, "compression/movement_sparsity/linear_layer_sparsity": 0.7524503806766669, "compression/movement_sparsity/model_sparsity": 0.7266013952922982, "compression_loss": 129.1519317626953, "distillation_loss": 6.514693737030029, "epoch": 1.42, "learning_rate": 3.987441130298273e-05, "loss": 134.5309, "step": 1677, "task_loss": 2.244901657104492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2031183027974959, "compression/movement_sparsity/importance_threshold": -0.0009032956969044219, "compression/movement_sparsity/linear_layer_sparsity": 0.7535459016540442, "compression/movement_sparsity/model_sparsity": 0.7276592817538858, "compression_loss": 129.28981018066406, "distillation_loss": 7.346892356872559, "epoch": 1.42, "learning_rate": 3.9868373384856906e-05, "loss": 134.6019, "step": 1678, "task_loss": 3.440413236618042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2044072229789304, "compression/movement_sparsity/importance_threshold": -0.0008993740134038428, "compression/movement_sparsity/linear_layer_sparsity": 0.7546157618226691, "compression/movement_sparsity/model_sparsity": 0.7286923889344435, "compression_loss": 129.4272918701172, "distillation_loss": 3.9865946769714355, "epoch": 1.42, "learning_rate": 3.986233546673107e-05, "loss": 134.5362, "step": 1679, "task_loss": 2.1240487098693848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2056924071624582, "compression/movement_sparsity/importance_threshold": -0.0008954636970938466, "compression/movement_sparsity/linear_layer_sparsity": 0.7556085322475282, "compression/movement_sparsity/model_sparsity": 0.7296510546410897, "compression_loss": 129.5644073486328, "distillation_loss": 4.212987899780273, "epoch": 1.42, "learning_rate": 3.985629754860524e-05, "loss": 133.9234, "step": 1680, "task_loss": 2.631201982498169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.206973860770428, "compression/movement_sparsity/importance_threshold": -0.0008915647314763327, "compression/movement_sparsity/linear_layer_sparsity": 0.7564401475467886, "compression/movement_sparsity/model_sparsity": 0.7304541013964725, "compression_loss": 129.7010955810547, "distillation_loss": 4.671471118927002, "epoch": 1.42, "learning_rate": 3.9850259630479414e-05, "loss": 134.717, "step": 1681, "task_loss": 2.4621150493621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2082515892251877, "compression/movement_sparsity/importance_threshold": -0.0008876771000532017, "compression/movement_sparsity/linear_layer_sparsity": 0.7574919545256127, "compression/movement_sparsity/model_sparsity": 0.7314697755698374, "compression_loss": 129.83741760253906, "distillation_loss": 6.7365570068359375, "epoch": 1.42, "learning_rate": 3.984422171235358e-05, "loss": 136.1288, "step": 1682, "task_loss": 2.460587978363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2095255979490855, "compression/movement_sparsity/importance_threshold": -0.0008838007863263553, "compression/movement_sparsity/linear_layer_sparsity": 0.7585072616273035, "compression/movement_sparsity/model_sparsity": 0.7324502037491352, "compression_loss": 129.97329711914062, "distillation_loss": 7.12745475769043, "epoch": 1.42, "learning_rate": 3.983818379422775e-05, "loss": 135.8651, "step": 1683, "task_loss": 3.2907121181488037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2107958923644702, "compression/movement_sparsity/importance_threshold": -0.0008799357737976918, "compression/movement_sparsity/linear_layer_sparsity": 0.7595094282962597, "compression/movement_sparsity/model_sparsity": 0.7334179429099873, "compression_loss": 130.10877990722656, "distillation_loss": 5.668455123901367, "epoch": 1.42, "learning_rate": 3.983214587610192e-05, "loss": 134.9896, "step": 1684, "task_loss": 3.0703577995300293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2120624778936888, "compression/movement_sparsity/importance_threshold": -0.0008760820459691137, "compression/movement_sparsity/linear_layer_sparsity": 0.7604597963810056, "compression/movement_sparsity/model_sparsity": 0.7343356629273481, "compression_loss": 130.24391174316406, "distillation_loss": 6.114388942718506, "epoch": 1.42, "learning_rate": 3.98261079579761e-05, "loss": 135.7736, "step": 1685, "task_loss": 2.9886763095855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2133253599590907, "compression/movement_sparsity/importance_threshold": -0.0008722395863425205, "compression/movement_sparsity/linear_layer_sparsity": 0.7614440052535021, "compression/movement_sparsity/model_sparsity": 0.7352860611972937, "compression_loss": 130.37860107421875, "distillation_loss": 6.420416831970215, "epoch": 1.42, "learning_rate": 3.982007003985026e-05, "loss": 136.2085, "step": 1686, "task_loss": 4.890620708465576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2145845439830232, "compression/movement_sparsity/importance_threshold": -0.000868408378419813, "compression/movement_sparsity/linear_layer_sparsity": 0.762181193069254, "compression/movement_sparsity/model_sparsity": 0.7359979243437196, "compression_loss": 130.51290893554688, "distillation_loss": 4.34592342376709, "epoch": 1.43, "learning_rate": 3.981403212172443e-05, "loss": 136.002, "step": 1687, "task_loss": 2.0691323280334473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.215840035387835, "compression/movement_sparsity/importance_threshold": -0.0008645884057028913, "compression/movement_sparsity/linear_layer_sparsity": 0.7632171051326196, "compression/movement_sparsity/model_sparsity": 0.7369982496408702, "compression_loss": 130.64688110351562, "distillation_loss": 4.932995796203613, "epoch": 1.43, "learning_rate": 3.9807994203598605e-05, "loss": 136.405, "step": 1688, "task_loss": 2.7900893688201904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.217091839595874, "compression/movement_sparsity/importance_threshold": -0.000860779651693656, "compression/movement_sparsity/linear_layer_sparsity": 0.7642276306430885, "compression/movement_sparsity/model_sparsity": 0.7379740604913144, "compression_loss": 130.7803955078125, "distillation_loss": 4.772183418273926, "epoch": 1.43, "learning_rate": 3.980195628547277e-05, "loss": 135.5182, "step": 1689, "task_loss": 1.9071472883224487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2183399620294888, "compression/movement_sparsity/importance_threshold": -0.000856982099894008, "compression/movement_sparsity/linear_layer_sparsity": 0.7652892750602123, "compression/movement_sparsity/model_sparsity": 0.7389992341567098, "compression_loss": 130.91357421875, "distillation_loss": 3.9670186042785645, "epoch": 1.43, "learning_rate": 3.979591836734694e-05, "loss": 136.0866, "step": 1690, "task_loss": 2.5799996852874756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.219584408111027, "compression/movement_sparsity/importance_threshold": -0.0008531957338058474, "compression/movement_sparsity/linear_layer_sparsity": 0.766157426009109, "compression/movement_sparsity/model_sparsity": 0.7398375614497671, "compression_loss": 131.04637145996094, "distillation_loss": 6.1601104736328125, "epoch": 1.43, "learning_rate": 3.9789880449221113e-05, "loss": 136.4336, "step": 1691, "task_loss": 3.499772310256958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2208251832628374, "compression/movement_sparsity/importance_threshold": -0.0008494205369310743, "compression/movement_sparsity/linear_layer_sparsity": 0.7672088514145687, "compression/movement_sparsity/model_sparsity": 0.7408528671579865, "compression_loss": 131.17872619628906, "distillation_loss": 5.559735298156738, "epoch": 1.43, "learning_rate": 3.978384253109528e-05, "loss": 135.6633, "step": 1692, "task_loss": 2.7942800521850586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2220622929072678, "compression/movement_sparsity/importance_threshold": -0.0008456564927715898, "compression/movement_sparsity/linear_layer_sparsity": 0.7682637944494812, "compression/movement_sparsity/model_sparsity": 0.7418715696542654, "compression_loss": 131.31069946289062, "distillation_loss": 9.711210250854492, "epoch": 1.43, "learning_rate": 3.977780461296945e-05, "loss": 137.2612, "step": 1693, "task_loss": 4.893489360809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2232957424666666, "compression/movement_sparsity/importance_threshold": -0.0008419035848292937, "compression/movement_sparsity/linear_layer_sparsity": 0.7690264045906329, "compression/movement_sparsity/model_sparsity": 0.7426079817910052, "compression_loss": 131.44224548339844, "distillation_loss": 6.687668323516846, "epoch": 1.43, "learning_rate": 3.977176669484362e-05, "loss": 136.9284, "step": 1694, "task_loss": 1.8430663347244263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2245255373633819, "compression/movement_sparsity/importance_threshold": -0.0008381617966060871, "compression/movement_sparsity/linear_layer_sparsity": 0.7700698885004472, "compression/movement_sparsity/model_sparsity": 0.7436156188183854, "compression_loss": 131.57342529296875, "distillation_loss": 4.849740982055664, "epoch": 1.43, "learning_rate": 3.976572877671779e-05, "loss": 137.1065, "step": 1695, "task_loss": 2.328639030456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.225751683019762, "compression/movement_sparsity/importance_threshold": -0.0008344311116038699, "compression/movement_sparsity/linear_layer_sparsity": 0.7709610530928811, "compression/movement_sparsity/model_sparsity": 0.7444761691655263, "compression_loss": 131.70416259765625, "distillation_loss": 6.2563958168029785, "epoch": 1.43, "learning_rate": 3.9759690858591956e-05, "loss": 136.7041, "step": 1696, "task_loss": 3.334831953048706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2269741848581548, "compression/movement_sparsity/importance_threshold": -0.0008307115133245435, "compression/movement_sparsity/linear_layer_sparsity": 0.7718619597302734, "compression/movement_sparsity/model_sparsity": 0.7453461268884112, "compression_loss": 131.8345184326172, "distillation_loss": 5.515175819396973, "epoch": 1.43, "learning_rate": 3.975365294046613e-05, "loss": 136.3162, "step": 1697, "task_loss": 3.497478485107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.228193048300909, "compression/movement_sparsity/importance_threshold": -0.0008270029852700065, "compression/movement_sparsity/linear_layer_sparsity": 0.7728944972541981, "compression/movement_sparsity/model_sparsity": 0.7463431935719321, "compression_loss": 131.9645233154297, "distillation_loss": 5.253702163696289, "epoch": 1.44, "learning_rate": 3.9747615022340304e-05, "loss": 137.0365, "step": 1698, "task_loss": 3.289030075073242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2294082787703722, "compression/movement_sparsity/importance_threshold": -0.0008233055109421617, "compression/movement_sparsity/linear_layer_sparsity": 0.7737714720871453, "compression/movement_sparsity/model_sparsity": 0.7471900416214774, "compression_loss": 132.0940704345703, "distillation_loss": 5.983340740203857, "epoch": 1.44, "learning_rate": 3.9741577104214464e-05, "loss": 137.0967, "step": 1699, "task_loss": 3.4343206882476807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.230619881688893, "compression/movement_sparsity/importance_threshold": -0.0008196190738429085, "compression/movement_sparsity/linear_layer_sparsity": 0.7746836232146183, "compression/movement_sparsity/model_sparsity": 0.7480708575516166, "compression_loss": 132.2232208251953, "distillation_loss": 8.211292266845703, "epoch": 1.44, "learning_rate": 3.973553918608864e-05, "loss": 138.5958, "step": 1700, "task_loss": 3.1914620399475098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2318278624788197, "compression/movement_sparsity/importance_threshold": -0.0008159436574741458, "compression/movement_sparsity/linear_layer_sparsity": 0.7755221783794427, "compression/movement_sparsity/model_sparsity": 0.7488806057668319, "compression_loss": 132.35198974609375, "distillation_loss": 4.689528465270996, "epoch": 1.44, "learning_rate": 3.972950126796281e-05, "loss": 137.5942, "step": 1701, "task_loss": 2.8297510147094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2330322265625, "compression/movement_sparsity/importance_threshold": -0.000812279245337777, "compression/movement_sparsity/linear_layer_sparsity": 0.7763946339528558, "compression/movement_sparsity/model_sparsity": 0.7497230898073111, "compression_loss": 132.48025512695312, "distillation_loss": 4.332784175872803, "epoch": 1.44, "learning_rate": 3.972346334983697e-05, "loss": 137.2358, "step": 1702, "task_loss": 1.761114239692688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2342329793622824, "compression/movement_sparsity/importance_threshold": -0.0008086258209357008, "compression/movement_sparsity/linear_layer_sparsity": 0.7773658693309645, "compression/movement_sparsity/model_sparsity": 0.7506609602623122, "compression_loss": 132.60812377929688, "distillation_loss": 6.659272193908691, "epoch": 1.44, "learning_rate": 3.971742543171115e-05, "loss": 138.7303, "step": 1703, "task_loss": 3.077975034713745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2354301263005154, "compression/movement_sparsity/importance_threshold": -0.0008049833677698172, "compression/movement_sparsity/linear_layer_sparsity": 0.7781342507692519, "compression/movement_sparsity/model_sparsity": 0.7514029454343766, "compression_loss": 132.73570251464844, "distillation_loss": 4.140135765075684, "epoch": 1.44, "learning_rate": 3.971138751358532e-05, "loss": 137.6002, "step": 1704, "task_loss": 2.381814479827881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2366236727995465, "compression/movement_sparsity/importance_threshold": -0.0008013518693420285, "compression/movement_sparsity/linear_layer_sparsity": 0.7790292907161674, "compression/movement_sparsity/model_sparsity": 0.7522672380056507, "compression_loss": 132.86288452148438, "distillation_loss": 6.575408935546875, "epoch": 1.44, "learning_rate": 3.970534959545949e-05, "loss": 138.1842, "step": 1705, "task_loss": 4.056782245635986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2378136242817244, "compression/movement_sparsity/importance_threshold": -0.0007977313091542338, "compression/movement_sparsity/linear_layer_sparsity": 0.7799712164902272, "compression/movement_sparsity/model_sparsity": 0.7531768057316688, "compression_loss": 132.98959350585938, "distillation_loss": 4.539612770080566, "epoch": 1.44, "learning_rate": 3.9699311677333655e-05, "loss": 137.9326, "step": 1706, "task_loss": 2.8210482597351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2389999861693972, "compression/movement_sparsity/importance_threshold": -0.0007941216707083336, "compression/movement_sparsity/linear_layer_sparsity": 0.7807475513483276, "compression/movement_sparsity/model_sparsity": 0.7539264710991082, "compression_loss": 133.11599731445312, "distillation_loss": 6.816411972045898, "epoch": 1.44, "learning_rate": 3.969327375920783e-05, "loss": 137.8192, "step": 1707, "task_loss": 3.2334794998168945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.240182763884913, "compression/movement_sparsity/importance_threshold": -0.000790522937506229, "compression/movement_sparsity/linear_layer_sparsity": 0.7816695876107708, "compression/movement_sparsity/model_sparsity": 0.754816832579421, "compression_loss": 133.24200439453125, "distillation_loss": 6.478850364685059, "epoch": 1.44, "learning_rate": 3.9687235841081996e-05, "loss": 138.5162, "step": 1708, "task_loss": 2.6535329818725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.24136196285062, "compression/movement_sparsity/importance_threshold": -0.0007869350930498195, "compression/movement_sparsity/linear_layer_sparsity": 0.782560346781505, "compression/movement_sparsity/model_sparsity": 0.7556769914323448, "compression_loss": 133.36756896972656, "distillation_loss": 5.8360419273376465, "epoch": 1.44, "learning_rate": 3.968119792295616e-05, "loss": 138.6459, "step": 1709, "task_loss": 2.3818163871765137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2425375884888668, "compression/movement_sparsity/importance_threshold": -0.0007833581208410065, "compression/movement_sparsity/linear_layer_sparsity": 0.7835102736720484, "compression/movement_sparsity/model_sparsity": 0.7565942854118811, "compression_loss": 133.49282836914062, "distillation_loss": 5.689027309417725, "epoch": 1.45, "learning_rate": 3.967516000483034e-05, "loss": 139.0112, "step": 1710, "task_loss": 2.6477861404418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2437096462220008, "compression/movement_sparsity/importance_threshold": -0.0007797920043816904, "compression/movement_sparsity/linear_layer_sparsity": 0.7845658725361808, "compression/movement_sparsity/model_sparsity": 0.7576136212076287, "compression_loss": 133.61770629882812, "distillation_loss": 6.724252700805664, "epoch": 1.45, "learning_rate": 3.9669122086704504e-05, "loss": 139.1362, "step": 1711, "task_loss": 3.3332371711730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2448781414723709, "compression/movement_sparsity/importance_threshold": -0.0007762367271737704, "compression/movement_sparsity/linear_layer_sparsity": 0.7854554631384867, "compression/movement_sparsity/model_sparsity": 0.7584726516360445, "compression_loss": 133.74209594726562, "distillation_loss": 6.169949531555176, "epoch": 1.45, "learning_rate": 3.966308416857867e-05, "loss": 138.9246, "step": 1712, "task_loss": 3.340078592300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.246043079662325, "compression/movement_sparsity/importance_threshold": -0.0007726922727191486, "compression/movement_sparsity/linear_layer_sparsity": 0.7863619622105003, "compression/movement_sparsity/model_sparsity": 0.7593480096762171, "compression_loss": 133.86624145507812, "distillation_loss": 7.8686299324035645, "epoch": 1.45, "learning_rate": 3.9657046250452846e-05, "loss": 139.8936, "step": 1713, "task_loss": 4.0748090744018555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.247204466214211, "compression/movement_sparsity/importance_threshold": -0.0007691586245197253, "compression/movement_sparsity/linear_layer_sparsity": 0.7873103508834186, "compression/movement_sparsity/model_sparsity": 0.7602638182806359, "compression_loss": 133.9898223876953, "distillation_loss": 6.3871893882751465, "epoch": 1.45, "learning_rate": 3.965100833232702e-05, "loss": 139.7479, "step": 1714, "task_loss": 3.305750608444214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2483623065503777, "compression/movement_sparsity/importance_threshold": -0.0007656357660773996, "compression/movement_sparsity/linear_layer_sparsity": 0.7882580598787817, "compression/movement_sparsity/model_sparsity": 0.7611789705565144, "compression_loss": 134.11309814453125, "distillation_loss": 5.852474212646484, "epoch": 1.45, "learning_rate": 3.964497041420119e-05, "loss": 139.2029, "step": 1715, "task_loss": 2.626415491104126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2495166060931733, "compression/movement_sparsity/importance_threshold": -0.0007621236808940729, "compression/movement_sparsity/linear_layer_sparsity": 0.7890454126677778, "compression/movement_sparsity/model_sparsity": 0.7619392753550279, "compression_loss": 134.2360076904297, "distillation_loss": 4.506422996520996, "epoch": 1.45, "learning_rate": 3.9638932496075354e-05, "loss": 140.0402, "step": 1716, "task_loss": 1.7811309099197388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2506673702649453, "compression/movement_sparsity/importance_threshold": -0.0007586223524716461, "compression/movement_sparsity/linear_layer_sparsity": 0.7899333815832853, "compression/movement_sparsity/model_sparsity": 0.7627967398065758, "compression_loss": 134.35855102539062, "distillation_loss": 5.711544036865234, "epoch": 1.45, "learning_rate": 3.963289457794953e-05, "loss": 140.2148, "step": 1717, "task_loss": 3.5613555908203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2518146044880425, "compression/movement_sparsity/importance_threshold": -0.0007551317643120182, "compression/movement_sparsity/linear_layer_sparsity": 0.7907794728220555, "compression/movement_sparsity/model_sparsity": 0.7636137652084133, "compression_loss": 134.480712890625, "distillation_loss": 5.139287948608398, "epoch": 1.45, "learning_rate": 3.9626856659823695e-05, "loss": 140.1447, "step": 1718, "task_loss": 3.3396553993225098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2529583141848128, "compression/movement_sparsity/importance_threshold": -0.0007516518999170912, "compression/movement_sparsity/linear_layer_sparsity": 0.7914692501472873, "compression/movement_sparsity/model_sparsity": 0.7642798465605201, "compression_loss": 134.6024932861328, "distillation_loss": 7.168874740600586, "epoch": 1.45, "learning_rate": 3.962081874169786e-05, "loss": 141.2132, "step": 1719, "task_loss": 3.6089367866516113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2540985047776043, "compression/movement_sparsity/importance_threshold": -0.0007481827427887654, "compression/movement_sparsity/linear_layer_sparsity": 0.7924117840538965, "compression/movement_sparsity/model_sparsity": 0.7651900015278638, "compression_loss": 134.72389221191406, "distillation_loss": 5.392602920532227, "epoch": 1.45, "learning_rate": 3.9614780823572036e-05, "loss": 140.2986, "step": 1720, "task_loss": 2.903820514678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.255235181688766, "compression/movement_sparsity/importance_threshold": -0.0007447242764289396, "compression/movement_sparsity/linear_layer_sparsity": 0.7931953568817519, "compression/movement_sparsity/model_sparsity": 0.7659466562185304, "compression_loss": 134.84487915039062, "distillation_loss": 5.348937034606934, "epoch": 1.45, "learning_rate": 3.9608742905446203e-05, "loss": 140.2073, "step": 1721, "task_loss": 2.669987201690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2563683503406449, "compression/movement_sparsity/importance_threshold": -0.0007412764843395168, "compression/movement_sparsity/linear_layer_sparsity": 0.7940093721095818, "compression/movement_sparsity/model_sparsity": 0.7667327075190805, "compression_loss": 134.96548461914062, "distillation_loss": 4.224820137023926, "epoch": 1.46, "learning_rate": 3.960270498732037e-05, "loss": 140.2545, "step": 1722, "task_loss": 2.1910085678100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.25749801615559, "compression/movement_sparsity/importance_threshold": -0.0007378393500223957, "compression/movement_sparsity/linear_layer_sparsity": 0.7949919473711121, "compression/movement_sparsity/model_sparsity": 0.7676815282976223, "compression_loss": 135.08578491210938, "distillation_loss": 7.336986064910889, "epoch": 1.46, "learning_rate": 3.9596667069194545e-05, "loss": 140.5594, "step": 1723, "task_loss": 3.3643126487731934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2586241845559494, "compression/movement_sparsity/importance_threshold": -0.0007344128569794768, "compression/movement_sparsity/linear_layer_sparsity": 0.7957787397242293, "compression/movement_sparsity/model_sparsity": 0.7684412919129535, "compression_loss": 135.20565795898438, "distillation_loss": 6.3377556800842285, "epoch": 1.46, "learning_rate": 3.959062915106871e-05, "loss": 141.442, "step": 1724, "task_loss": 2.8359532356262207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.259746860964071, "compression/movement_sparsity/importance_threshold": -0.000730996988712661, "compression/movement_sparsity/linear_layer_sparsity": 0.7964351890009188, "compression/movement_sparsity/model_sparsity": 0.7690751901375146, "compression_loss": 135.32522583007812, "distillation_loss": 6.394156455993652, "epoch": 1.46, "learning_rate": 3.9584591232942886e-05, "loss": 141.6865, "step": 1725, "task_loss": 3.4933269023895264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.260866050802303, "compression/movement_sparsity/importance_threshold": -0.0007275917287238491, "compression/movement_sparsity/linear_layer_sparsity": 0.7973268305600582, "compression/movement_sparsity/model_sparsity": 0.7699362010660871, "compression_loss": 135.44436645507812, "distillation_loss": 4.957626819610596, "epoch": 1.46, "learning_rate": 3.957855331481705e-05, "loss": 140.783, "step": 1726, "task_loss": 3.05924916267395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2619817594929943, "compression/movement_sparsity/importance_threshold": -0.0007241970605149405, "compression/movement_sparsity/linear_layer_sparsity": 0.7982064525582204, "compression/movement_sparsity/model_sparsity": 0.7707856053425788, "compression_loss": 135.5631866455078, "distillation_loss": 5.330682754516602, "epoch": 1.46, "learning_rate": 3.957251539669122e-05, "loss": 141.2486, "step": 1727, "task_loss": 2.1685941219329834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.263093992458492, "compression/movement_sparsity/importance_threshold": -0.0007208129675878375, "compression/movement_sparsity/linear_layer_sparsity": 0.7988592530396135, "compression/movement_sparsity/model_sparsity": 0.7714159801191868, "compression_loss": 135.68154907226562, "distillation_loss": 8.086620330810547, "epoch": 1.46, "learning_rate": 3.9566477478565394e-05, "loss": 142.455, "step": 1728, "task_loss": 3.685267210006714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2642027551211452, "compression/movement_sparsity/importance_threshold": -0.0007174394334444382, "compression/movement_sparsity/linear_layer_sparsity": 0.7996886742920288, "compression/movement_sparsity/model_sparsity": 0.7722169081999833, "compression_loss": 135.79966735839844, "distillation_loss": 6.91874885559082, "epoch": 1.46, "learning_rate": 3.956043956043956e-05, "loss": 141.5063, "step": 1729, "task_loss": 3.3561720848083496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2653080529033018, "compression/movement_sparsity/importance_threshold": -0.0007140764415866445, "compression/movement_sparsity/linear_layer_sparsity": 0.8004280919271286, "compression/movement_sparsity/model_sparsity": 0.7729309245646029, "compression_loss": 135.91737365722656, "distillation_loss": 7.179739952087402, "epoch": 1.46, "learning_rate": 3.9554401642313735e-05, "loss": 141.1135, "step": 1730, "task_loss": 4.365039348602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2664098912273096, "compression/movement_sparsity/importance_threshold": -0.0007107239755163573, "compression/movement_sparsity/linear_layer_sparsity": 0.8012194870089533, "compression/movement_sparsity/model_sparsity": 0.7736951327907508, "compression_loss": 136.03460693359375, "distillation_loss": 7.279413223266602, "epoch": 1.46, "learning_rate": 3.95483637241879e-05, "loss": 142.204, "step": 1731, "task_loss": 3.3209292888641357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2675082755155174, "compression/movement_sparsity/importance_threshold": -0.0007073820187354762, "compression/movement_sparsity/linear_layer_sparsity": 0.8021084575545421, "compression/movement_sparsity/model_sparsity": 0.7745535644633054, "compression_loss": 136.15151977539062, "distillation_loss": 5.9051289558410645, "epoch": 1.46, "learning_rate": 3.954232580606207e-05, "loss": 141.9615, "step": 1732, "task_loss": 3.937056303024292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.268603211190273, "compression/movement_sparsity/importance_threshold": -0.0007040505547459015, "compression/movement_sparsity/linear_layer_sparsity": 0.8028473982229365, "compression/movement_sparsity/model_sparsity": 0.775267120246493, "compression_loss": 136.2680206298828, "distillation_loss": 6.020902633666992, "epoch": 1.46, "learning_rate": 3.9536287887936244e-05, "loss": 141.8759, "step": 1733, "task_loss": 2.859494209289551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2696947036739248, "compression/movement_sparsity/importance_threshold": -0.0007007295670495343, "compression/movement_sparsity/linear_layer_sparsity": 0.8036293612881612, "compression/movement_sparsity/model_sparsity": 0.7760222204748275, "compression_loss": 136.38421630859375, "distillation_loss": 6.9229888916015625, "epoch": 1.47, "learning_rate": 3.953024996981041e-05, "loss": 142.41, "step": 1734, "task_loss": 3.269270896911621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2707827583888207, "compression/movement_sparsity/importance_threshold": -0.0006974190391482744, "compression/movement_sparsity/linear_layer_sparsity": 0.8043100523976544, "compression/movement_sparsity/model_sparsity": 0.7766795277506588, "compression_loss": 136.49996948242188, "distillation_loss": 5.054283618927002, "epoch": 1.47, "learning_rate": 3.9524212051684585e-05, "loss": 141.71, "step": 1735, "task_loss": 1.6233290433883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2718673807573095, "compression/movement_sparsity/importance_threshold": -0.0006941189545440229, "compression/movement_sparsity/linear_layer_sparsity": 0.8051874207281335, "compression/movement_sparsity/model_sparsity": 0.7775267557798853, "compression_loss": 136.61537170410156, "distillation_loss": 3.486783981323242, "epoch": 1.47, "learning_rate": 3.951817413355875e-05, "loss": 141.9053, "step": 1736, "task_loss": 2.760249614715576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2729485762017387, "compression/movement_sparsity/importance_threshold": -0.0006908292967386801, "compression/movement_sparsity/linear_layer_sparsity": 0.8060227086710258, "compression/movement_sparsity/model_sparsity": 0.7783333490122929, "compression_loss": 136.73033142089844, "distillation_loss": 6.09597110748291, "epoch": 1.47, "learning_rate": 3.951213621543292e-05, "loss": 142.905, "step": 1737, "task_loss": 2.4623823165893555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2740263501444573, "compression/movement_sparsity/importance_threshold": -0.0006875500492341453, "compression/movement_sparsity/linear_layer_sparsity": 0.8067632471778834, "compression/movement_sparsity/model_sparsity": 0.7790484477432771, "compression_loss": 136.8450469970703, "distillation_loss": 5.260236740112305, "epoch": 1.47, "learning_rate": 3.950609829730709e-05, "loss": 142.8653, "step": 1738, "task_loss": 3.8331313133239746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2751007080078125, "compression/movement_sparsity/importance_threshold": -0.0006842811955323214, "compression/movement_sparsity/linear_layer_sparsity": 0.8074705053328693, "compression/movement_sparsity/model_sparsity": 0.7797314094048586, "compression_loss": 136.95932006835938, "distillation_loss": 5.983112335205078, "epoch": 1.47, "learning_rate": 3.950006037918126e-05, "loss": 142.5842, "step": 1739, "task_loss": 3.2135629653930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.276171655214153, "compression/movement_sparsity/importance_threshold": -0.0006810227191351072, "compression/movement_sparsity/linear_layer_sparsity": 0.8083913014818783, "compression/movement_sparsity/model_sparsity": 0.7806205733734489, "compression_loss": 137.07310485839844, "distillation_loss": 5.932247638702393, "epoch": 1.47, "learning_rate": 3.949402246105543e-05, "loss": 143.3072, "step": 1740, "task_loss": 2.5304110050201416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2772391971858275, "compression/movement_sparsity/importance_threshold": -0.0006777746035444027, "compression/movement_sparsity/linear_layer_sparsity": 0.8092920531050914, "compression/movement_sparsity/model_sparsity": 0.7814903814073685, "compression_loss": 137.1866912841797, "distillation_loss": 7.508889198303223, "epoch": 1.47, "learning_rate": 3.94879845429296e-05, "loss": 144.3809, "step": 1741, "task_loss": 4.748712539672852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2783033393451833, "compression/movement_sparsity/importance_threshold": -0.0006745368322621098, "compression/movement_sparsity/linear_layer_sparsity": 0.8098400640013004, "compression/movement_sparsity/model_sparsity": 0.782019566443414, "compression_loss": 137.29981994628906, "distillation_loss": 5.782407283782959, "epoch": 1.47, "learning_rate": 3.948194662480377e-05, "loss": 143.3534, "step": 1742, "task_loss": 4.705403804779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2793640871145688, "compression/movement_sparsity/importance_threshold": -0.0006713093887901283, "compression/movement_sparsity/linear_layer_sparsity": 0.8103145862523695, "compression/movement_sparsity/model_sparsity": 0.7824777873953578, "compression_loss": 137.41268920898438, "distillation_loss": 9.290245056152344, "epoch": 1.47, "learning_rate": 3.9475908706677936e-05, "loss": 144.3383, "step": 1743, "task_loss": 4.8011155128479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2804214459163328, "compression/movement_sparsity/importance_threshold": -0.0006680922566303579, "compression/movement_sparsity/linear_layer_sparsity": 0.8111636823813841, "compression/movement_sparsity/model_sparsity": 0.7832977144602155, "compression_loss": 137.52517700195312, "distillation_loss": 8.803297996520996, "epoch": 1.47, "learning_rate": 3.946987078855211e-05, "loss": 144.1548, "step": 1744, "task_loss": 4.50725793838501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2814754211728228, "compression/movement_sparsity/importance_threshold": -0.0006648854192847008, "compression/movement_sparsity/linear_layer_sparsity": 0.8119442741673306, "compression/movement_sparsity/model_sparsity": 0.7840514905169335, "compression_loss": 137.63722229003906, "distillation_loss": 6.513503074645996, "epoch": 1.47, "learning_rate": 3.9463832870426284e-05, "loss": 143.5152, "step": 1745, "task_loss": 2.372196674346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2825260183063873, "compression/movement_sparsity/importance_threshold": -0.0006616888602550552, "compression/movement_sparsity/linear_layer_sparsity": 0.8127045471716255, "compression/movement_sparsity/model_sparsity": 0.7847856458046576, "compression_loss": 137.74900817871094, "distillation_loss": 7.200915336608887, "epoch": 1.48, "learning_rate": 3.945779495230045e-05, "loss": 144.3316, "step": 1746, "task_loss": 3.886157751083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2835732427393747, "compression/movement_sparsity/importance_threshold": -0.0006585025630433234, "compression/movement_sparsity/linear_layer_sparsity": 0.8133733379618182, "compression/movement_sparsity/model_sparsity": 0.785431461573766, "compression_loss": 137.8603973388672, "distillation_loss": 6.107273101806641, "epoch": 1.48, "learning_rate": 3.945175703417462e-05, "loss": 143.9328, "step": 1747, "task_loss": 2.3961503505706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2846170998941326, "compression/movement_sparsity/importance_threshold": -0.0006553265111514054, "compression/movement_sparsity/linear_layer_sparsity": 0.8140284755800679, "compression/movement_sparsity/model_sparsity": 0.7860640931993896, "compression_loss": 137.9713592529297, "distillation_loss": 7.003299713134766, "epoch": 1.48, "learning_rate": 3.944571911604879e-05, "loss": 143.8237, "step": 1748, "task_loss": 3.2412619590759277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2856575951930098, "compression/movement_sparsity/importance_threshold": -0.0006521606880812011, "compression/movement_sparsity/linear_layer_sparsity": 0.8146782830953841, "compression/movement_sparsity/model_sparsity": 0.7866915778275132, "compression_loss": 138.08189392089844, "distillation_loss": 5.127871990203857, "epoch": 1.48, "learning_rate": 3.943968119792296e-05, "loss": 143.6361, "step": 1749, "task_loss": 2.6416983604431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.286694734058354, "compression/movement_sparsity/importance_threshold": -0.000649005077334612, "compression/movement_sparsity/linear_layer_sparsity": 0.8154368509437072, "compression/movement_sparsity/model_sparsity": 0.7874240865366187, "compression_loss": 138.19223022460938, "distillation_loss": 5.6010870933532715, "epoch": 1.48, "learning_rate": 3.9433643279797126e-05, "loss": 144.204, "step": 1750, "task_loss": 2.7898385524749756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2877285219125139, "compression/movement_sparsity/importance_threshold": -0.0006458596624135384, "compression/movement_sparsity/linear_layer_sparsity": 0.8159866743133969, "compression/movement_sparsity/model_sparsity": 0.787955021782105, "compression_loss": 138.3020782470703, "distillation_loss": 7.651647567749023, "epoch": 1.48, "learning_rate": 3.94276053616713e-05, "loss": 143.6304, "step": 1751, "task_loss": 3.8118278980255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2887589641778374, "compression/movement_sparsity/importance_threshold": -0.0006427244268198788, "compression/movement_sparsity/linear_layer_sparsity": 0.8167127011082417, "compression/movement_sparsity/model_sparsity": 0.7886561073230272, "compression_loss": 138.41151428222656, "distillation_loss": 5.834653854370117, "epoch": 1.48, "learning_rate": 3.942156744354547e-05, "loss": 144.2587, "step": 1752, "task_loss": 2.2168214321136475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2897860662766727, "compression/movement_sparsity/importance_threshold": -0.000639599354055536, "compression/movement_sparsity/linear_layer_sparsity": 0.8176160521999994, "compression/movement_sparsity/model_sparsity": 0.78952842552575, "compression_loss": 138.52056884765625, "distillation_loss": 4.8426713943481445, "epoch": 1.48, "learning_rate": 3.9415529525419635e-05, "loss": 144.8146, "step": 1753, "task_loss": 2.2209486961364746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2908098336313678, "compression/movement_sparsity/importance_threshold": -0.0006364844276224102, "compression/movement_sparsity/linear_layer_sparsity": 0.8184404772261753, "compression/movement_sparsity/model_sparsity": 0.7903245290160488, "compression_loss": 138.62942504882812, "distillation_loss": 6.882920742034912, "epoch": 1.48, "learning_rate": 3.940949160729381e-05, "loss": 144.5015, "step": 1754, "task_loss": 4.240734577178955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2918302716642716, "compression/movement_sparsity/importance_threshold": -0.0006333796310224, "compression/movement_sparsity/linear_layer_sparsity": 0.8190444482410995, "compression/movement_sparsity/model_sparsity": 0.790907751768578, "compression_loss": 138.73785400390625, "distillation_loss": 5.063625335693359, "epoch": 1.48, "learning_rate": 3.940345368916798e-05, "loss": 144.5788, "step": 1755, "task_loss": 2.6990959644317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2928473857977316, "compression/movement_sparsity/importance_threshold": -0.0006302849477574082, "compression/movement_sparsity/linear_layer_sparsity": 0.8197788934982951, "compression/movement_sparsity/model_sparsity": 0.7916169665717712, "compression_loss": 138.84597778320312, "distillation_loss": 4.88409948348999, "epoch": 1.48, "learning_rate": 3.939741577104214e-05, "loss": 144.4637, "step": 1756, "task_loss": 3.1373655796051025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2938611814540961, "compression/movement_sparsity/importance_threshold": -0.0006272003613293341, "compression/movement_sparsity/linear_layer_sparsity": 0.8204202110062547, "compression/movement_sparsity/model_sparsity": 0.792236252850409, "compression_loss": 138.95367431640625, "distillation_loss": 7.489863395690918, "epoch": 1.48, "learning_rate": 3.939137785291632e-05, "loss": 145.4002, "step": 1757, "task_loss": 3.7274980545043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2948716640557136, "compression/movement_sparsity/importance_threshold": -0.0006241258552400772, "compression/movement_sparsity/linear_layer_sparsity": 0.8210846137027574, "compression/movement_sparsity/model_sparsity": 0.792877831270345, "compression_loss": 139.06105041503906, "distillation_loss": 6.363085746765137, "epoch": 1.49, "learning_rate": 3.938533993479049e-05, "loss": 144.8903, "step": 1758, "task_loss": 2.6233699321746826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2958788390249318, "compression/movement_sparsity/importance_threshold": -0.0006210614129915403, "compression/movement_sparsity/linear_layer_sparsity": 0.8217275886700184, "compression/movement_sparsity/model_sparsity": 0.7934987180694582, "compression_loss": 139.16806030273438, "distillation_loss": 5.925960540771484, "epoch": 1.49, "learning_rate": 3.937930201666465e-05, "loss": 144.6537, "step": 1759, "task_loss": 2.42376971244812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2968827117840998, "compression/movement_sparsity/importance_threshold": -0.0006180070180856218, "compression/movement_sparsity/linear_layer_sparsity": 0.8223641961317618, "compression/movement_sparsity/model_sparsity": 0.7941134561064571, "compression_loss": 139.2747039794922, "distillation_loss": 6.429130554199219, "epoch": 1.49, "learning_rate": 3.9373264098538825e-05, "loss": 145.0042, "step": 1760, "task_loss": 2.613955020904541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2978832877555648, "compression/movement_sparsity/importance_threshold": -0.000614962654024223, "compression/movement_sparsity/linear_layer_sparsity": 0.8230937763285622, "compression/movement_sparsity/model_sparsity": 0.7948179729790461, "compression_loss": 139.38095092773438, "distillation_loss": 5.598249435424805, "epoch": 1.49, "learning_rate": 3.9367226180413e-05, "loss": 144.9486, "step": 1761, "task_loss": 3.1093711853027344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2988805723616752, "compression/movement_sparsity/importance_threshold": -0.0006119283043092448, "compression/movement_sparsity/linear_layer_sparsity": 0.824018113955359, "compression/movement_sparsity/model_sparsity": 0.7957105567647673, "compression_loss": 139.4868927001953, "distillation_loss": 5.053100109100342, "epoch": 1.49, "learning_rate": 3.9361188262287166e-05, "loss": 145.444, "step": 1762, "task_loss": 2.607212543487549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2998745710247797, "compression/movement_sparsity/importance_threshold": -0.0006089039524425867, "compression/movement_sparsity/linear_layer_sparsity": 0.8246679453190106, "compression/movement_sparsity/model_sparsity": 0.7963380644219624, "compression_loss": 139.5924072265625, "distillation_loss": 7.497715950012207, "epoch": 1.49, "learning_rate": 3.9355150344161334e-05, "loss": 145.3521, "step": 1763, "task_loss": 3.1383824348449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.300865289167226, "compression/movement_sparsity/importance_threshold": -0.0006058895819261506, "compression/movement_sparsity/linear_layer_sparsity": 0.8254046203955542, "compression/movement_sparsity/model_sparsity": 0.7970494324433491, "compression_loss": 139.6975555419922, "distillation_loss": 8.2505521774292, "epoch": 1.49, "learning_rate": 3.934911242603551e-05, "loss": 146.1034, "step": 1764, "task_loss": 3.020756244659424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3018527322113624, "compression/movement_sparsity/importance_threshold": -0.000602885176261835, "compression/movement_sparsity/linear_layer_sparsity": 0.8261942864730716, "compression/movement_sparsity/model_sparsity": 0.7978119710618068, "compression_loss": 139.80259704589844, "distillation_loss": 3.945004940032959, "epoch": 1.49, "learning_rate": 3.9343074507909675e-05, "loss": 144.9918, "step": 1765, "task_loss": 1.8654829263687134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3028369055795375, "compression/movement_sparsity/importance_threshold": -0.000599890718951542, "compression/movement_sparsity/linear_layer_sparsity": 0.8267308501683501, "compression/movement_sparsity/model_sparsity": 0.7983301021434895, "compression_loss": 139.90704345703125, "distillation_loss": 6.858841419219971, "epoch": 1.49, "learning_rate": 3.933703658978384e-05, "loss": 146.1691, "step": 1766, "task_loss": 2.8752636909484863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.303817814694099, "compression/movement_sparsity/importance_threshold": -0.0005969061934971709, "compression/movement_sparsity/linear_layer_sparsity": 0.8274216291236633, "compression/movement_sparsity/model_sparsity": 0.7989971507166032, "compression_loss": 140.01119995117188, "distillation_loss": 5.228715896606445, "epoch": 1.49, "learning_rate": 3.9330998671658016e-05, "loss": 145.1536, "step": 1767, "task_loss": 2.606445789337158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3047954649773952, "compression/movement_sparsity/importance_threshold": -0.0005939315834006227, "compression/movement_sparsity/linear_layer_sparsity": 0.8281291973070078, "compression/movement_sparsity/model_sparsity": 0.7996804117561154, "compression_loss": 140.1150360107422, "distillation_loss": 7.838719367980957, "epoch": 1.49, "learning_rate": 3.932496075353218e-05, "loss": 146.51, "step": 1768, "task_loss": 4.325048923492432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3057698618517743, "compression/movement_sparsity/importance_threshold": -0.0005909668721637978, "compression/movement_sparsity/linear_layer_sparsity": 0.8287999794331956, "compression/movement_sparsity/model_sparsity": 0.8003281504527014, "compression_loss": 140.2186279296875, "distillation_loss": 6.408384323120117, "epoch": 1.5, "learning_rate": 3.931892283540635e-05, "loss": 145.9463, "step": 1769, "task_loss": 3.1985602378845215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3067410107395847, "compression/movement_sparsity/importance_threshold": -0.0005880120432885973, "compression/movement_sparsity/linear_layer_sparsity": 0.8294655864706295, "compression/movement_sparsity/model_sparsity": 0.8009708918407528, "compression_loss": 140.32174682617188, "distillation_loss": 5.4939045906066895, "epoch": 1.5, "learning_rate": 3.9312884917280524e-05, "loss": 147.2387, "step": 1770, "task_loss": 3.274066209793091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3077089170631744, "compression/movement_sparsity/importance_threshold": -0.0005850670802769204, "compression/movement_sparsity/linear_layer_sparsity": 0.8301473269068748, "compression/movement_sparsity/model_sparsity": 0.801629212395734, "compression_loss": 140.4244842529297, "distillation_loss": 4.6569342613220215, "epoch": 1.5, "learning_rate": 3.93068469991547e-05, "loss": 145.769, "step": 1771, "task_loss": 1.4715452194213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3086735862448915, "compression/movement_sparsity/importance_threshold": -0.0005821319666306692, "compression/movement_sparsity/linear_layer_sparsity": 0.8307435949095061, "compression/movement_sparsity/model_sparsity": 0.80220499675814, "compression_loss": 140.5270233154297, "distillation_loss": 5.71185827255249, "epoch": 1.5, "learning_rate": 3.930080908102886e-05, "loss": 145.9759, "step": 1772, "task_loss": 3.63094425201416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3096350237070846, "compression/movement_sparsity/importance_threshold": -0.0005792066858517422, "compression/movement_sparsity/linear_layer_sparsity": 0.8313881557910626, "compression/movement_sparsity/model_sparsity": 0.8028274149905139, "compression_loss": 140.6290740966797, "distillation_loss": 4.518129348754883, "epoch": 1.5, "learning_rate": 3.929477116290303e-05, "loss": 145.7943, "step": 1773, "task_loss": 1.7505050897598267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3105932348721014, "compression/movement_sparsity/importance_threshold": -0.0005762912214420414, "compression/movement_sparsity/linear_layer_sparsity": 0.831980166865848, "compression/movement_sparsity/model_sparsity": 0.8033990886636413, "compression_loss": 140.73081970214844, "distillation_loss": 5.6234517097473145, "epoch": 1.5, "learning_rate": 3.9288733244777206e-05, "loss": 147.0208, "step": 1774, "task_loss": 4.130677223205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3115482251622907, "compression/movement_sparsity/importance_threshold": -0.0005733855569034661, "compression/movement_sparsity/linear_layer_sparsity": 0.8325881682494329, "compression/movement_sparsity/model_sparsity": 0.8039862033292692, "compression_loss": 140.8321533203125, "distillation_loss": 7.025821685791016, "epoch": 1.5, "learning_rate": 3.928269532665137e-05, "loss": 146.7151, "step": 1775, "task_loss": 3.4836831092834473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3125, "compression/movement_sparsity/importance_threshold": -0.0005704896757379174, "compression/movement_sparsity/linear_layer_sparsity": 0.833281999791661, "compression/movement_sparsity/model_sparsity": 0.8046561996235461, "compression_loss": 140.93325805664062, "distillation_loss": 5.409782409667969, "epoch": 1.5, "learning_rate": 3.927665740852554e-05, "loss": 147.1021, "step": 1776, "task_loss": 1.9679040908813477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.313448564807578, "compression/movement_sparsity/importance_threshold": -0.0005676035614472956, "compression/movement_sparsity/linear_layer_sparsity": 0.8339593759825514, "compression/movement_sparsity/model_sparsity": 0.8053103058584266, "compression_loss": 141.03379821777344, "distillation_loss": 7.2750244140625, "epoch": 1.5, "learning_rate": 3.9270619490399715e-05, "loss": 147.2537, "step": 1777, "task_loss": 3.176138162612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3143939250073724, "compression/movement_sparsity/importance_threshold": -0.0005647271975335027, "compression/movement_sparsity/linear_layer_sparsity": 0.834428615827358, "compression/movement_sparsity/model_sparsity": 0.8057634258710134, "compression_loss": 141.13417053222656, "distillation_loss": 6.577167510986328, "epoch": 1.5, "learning_rate": 3.926458157227388e-05, "loss": 146.2798, "step": 1778, "task_loss": 3.0580403804779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.315336086021732, "compression/movement_sparsity/importance_threshold": -0.0005618605674984362, "compression/movement_sparsity/linear_layer_sparsity": 0.8349578819320433, "compression/movement_sparsity/model_sparsity": 0.8062745100567899, "compression_loss": 141.2341766357422, "distillation_loss": 4.356266975402832, "epoch": 1.5, "learning_rate": 3.925854365414805e-05, "loss": 146.4973, "step": 1779, "task_loss": 2.0127203464508057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3162750532730045, "compression/movement_sparsity/importance_threshold": -0.0005590036548439991, "compression/movement_sparsity/linear_layer_sparsity": 0.8354120734772933, "compression/movement_sparsity/model_sparsity": 0.8067130987252039, "compression_loss": 141.33375549316406, "distillation_loss": 4.648265838623047, "epoch": 1.5, "learning_rate": 3.925250573602222e-05, "loss": 147.0363, "step": 1780, "task_loss": 3.4716920852661133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3172108321835385, "compression/movement_sparsity/importance_threshold": -0.0005561564430720898, "compression/movement_sparsity/linear_layer_sparsity": 0.8358745046223798, "compression/movement_sparsity/model_sparsity": 0.807159643937852, "compression_loss": 141.43310546875, "distillation_loss": 7.4663848876953125, "epoch": 1.51, "learning_rate": 3.924646781789639e-05, "loss": 147.5221, "step": 1781, "task_loss": 3.9189014434814453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3181434281756819, "compression/movement_sparsity/importance_threshold": -0.0005533189156846112, "compression/movement_sparsity/linear_layer_sparsity": 0.8364867271613079, "compression/movement_sparsity/model_sparsity": 0.8077508347491511, "compression_loss": 141.53187561035156, "distillation_loss": 6.658722877502441, "epoch": 1.51, "learning_rate": 3.924042989977056e-05, "loss": 148.423, "step": 1782, "task_loss": 3.3712058067321777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.319072846671783, "compression/movement_sparsity/importance_threshold": -0.0005504910561834617, "compression/movement_sparsity/linear_layer_sparsity": 0.8370228854348868, "compression/movement_sparsity/model_sparsity": 0.8082685743366168, "compression_loss": 141.6305389404297, "distillation_loss": 7.865726947784424, "epoch": 1.51, "learning_rate": 3.923439198164473e-05, "loss": 148.1572, "step": 1783, "task_loss": 3.6238651275634766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3199990930941898, "compression/movement_sparsity/importance_threshold": -0.0005476728480705425, "compression/movement_sparsity/linear_layer_sparsity": 0.8376943114661269, "compression/movement_sparsity/model_sparsity": 0.8089169348181358, "compression_loss": 141.7288055419922, "distillation_loss": 6.46535587310791, "epoch": 1.51, "learning_rate": 3.92283540635189e-05, "loss": 147.4594, "step": 1784, "task_loss": 3.276215076446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.320922172865251, "compression/movement_sparsity/importance_threshold": -0.0005448642748477539, "compression/movement_sparsity/linear_layer_sparsity": 0.838362434502932, "compression/movement_sparsity/model_sparsity": 0.8095621057732397, "compression_loss": 141.8267822265625, "distillation_loss": 6.641994953155518, "epoch": 1.51, "learning_rate": 3.9222316145393066e-05, "loss": 147.7042, "step": 1785, "task_loss": 2.716071844100952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3218420914073143, "compression/movement_sparsity/importance_threshold": -0.0005420653200169968, "compression/movement_sparsity/linear_layer_sparsity": 0.8388262250031289, "compression/movement_sparsity/model_sparsity": 0.8100099636429684, "compression_loss": 141.9244384765625, "distillation_loss": 6.2577104568481445, "epoch": 1.51, "learning_rate": 3.921627822726724e-05, "loss": 147.8868, "step": 1786, "task_loss": 4.215167999267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.322758854142728, "compression/movement_sparsity/importance_threshold": -0.0005392759670801708, "compression/movement_sparsity/linear_layer_sparsity": 0.8393582932872088, "compression/movement_sparsity/model_sparsity": 0.8105237537446566, "compression_loss": 142.0216827392578, "distillation_loss": 5.036468505859375, "epoch": 1.51, "learning_rate": 3.9210240309141414e-05, "loss": 148.3714, "step": 1787, "task_loss": 2.350123405456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3236724664938404, "compression/movement_sparsity/importance_threshold": -0.0005364961995391777, "compression/movement_sparsity/linear_layer_sparsity": 0.8397848088393759, "compression/movement_sparsity/model_sparsity": 0.8109356171754916, "compression_loss": 142.11856079101562, "distillation_loss": 4.747697830200195, "epoch": 1.51, "learning_rate": 3.920420239101558e-05, "loss": 147.5398, "step": 1788, "task_loss": 3.692176342010498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3245829338829997, "compression/movement_sparsity/importance_threshold": -0.0005337260008959161, "compression/movement_sparsity/linear_layer_sparsity": 0.8403420728896703, "compression/movement_sparsity/model_sparsity": 0.8114737374913136, "compression_loss": 142.21507263183594, "distillation_loss": 6.544585704803467, "epoch": 1.51, "learning_rate": 3.919816447288975e-05, "loss": 148.1249, "step": 1789, "task_loss": 3.345541477203369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3254902617325541, "compression/movement_sparsity/importance_threshold": -0.000530965354652288, "compression/movement_sparsity/linear_layer_sparsity": 0.8408676901990592, "compression/movement_sparsity/model_sparsity": 0.8119812982291369, "compression_loss": 142.31141662597656, "distillation_loss": 7.286420822143555, "epoch": 1.51, "learning_rate": 3.919212655476392e-05, "loss": 148.3581, "step": 1790, "task_loss": 2.778806447982788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3263944554648517, "compression/movement_sparsity/importance_threshold": -0.0005282142443101935, "compression/movement_sparsity/linear_layer_sparsity": 0.8413476975672408, "compression/movement_sparsity/model_sparsity": 0.8124448158675461, "compression_loss": 142.40724182128906, "distillation_loss": 4.967457294464111, "epoch": 1.51, "learning_rate": 3.918608863663809e-05, "loss": 147.358, "step": 1791, "task_loss": 2.412517786026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3272955205022408, "compression/movement_sparsity/importance_threshold": -0.0005254726533715339, "compression/movement_sparsity/linear_layer_sparsity": 0.841845996608576, "compression/movement_sparsity/model_sparsity": 0.8129259968038642, "compression_loss": 142.5028839111328, "distillation_loss": 4.927112579345703, "epoch": 1.51, "learning_rate": 3.9180050718512256e-05, "loss": 147.8677, "step": 1792, "task_loss": 2.2537665367126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3281934622670695, "compression/movement_sparsity/importance_threshold": -0.0005227405653382075, "compression/movement_sparsity/linear_layer_sparsity": 0.8424267989657835, "compression/movement_sparsity/model_sparsity": 0.8134868468133447, "compression_loss": 142.5980682373047, "distillation_loss": 5.980989456176758, "epoch": 1.52, "learning_rate": 3.917401280038643e-05, "loss": 149.0166, "step": 1793, "task_loss": 2.145894765853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.329088286181686, "compression/movement_sparsity/importance_threshold": -0.0005200179637121155, "compression/movement_sparsity/linear_layer_sparsity": 0.8431107096005385, "compression/movement_sparsity/model_sparsity": 0.8141472630138407, "compression_loss": 142.69297790527344, "distillation_loss": 4.006131172180176, "epoch": 1.52, "learning_rate": 3.91679748822606e-05, "loss": 147.8136, "step": 1794, "task_loss": 1.4242582321166992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3299799976684386, "compression/movement_sparsity/importance_threshold": -0.00051730483199516, "compression/movement_sparsity/linear_layer_sparsity": 0.843798626755619, "compression/movement_sparsity/model_sparsity": 0.8148115480983634, "compression_loss": 142.78756713867188, "distillation_loss": 6.70750617980957, "epoch": 1.52, "learning_rate": 3.9161936964134765e-05, "loss": 149.304, "step": 1795, "task_loss": 3.9344842433929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3308686021496752, "compression/movement_sparsity/importance_threshold": -0.0005146011536892401, "compression/movement_sparsity/linear_layer_sparsity": 0.8443933446164578, "compression/movement_sparsity/model_sparsity": 0.8153858355711162, "compression_loss": 142.88172912597656, "distillation_loss": 5.885377883911133, "epoch": 1.52, "learning_rate": 3.915589904600894e-05, "loss": 148.9896, "step": 1796, "task_loss": 3.4574124813079834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3317541050477444, "compression/movement_sparsity/importance_threshold": -0.0005119069122962562, "compression/movement_sparsity/linear_layer_sparsity": 0.8451308066880653, "compression/movement_sparsity/model_sparsity": 0.8160979635518654, "compression_loss": 142.9755401611328, "distillation_loss": 5.861017227172852, "epoch": 1.52, "learning_rate": 3.9149861127883106e-05, "loss": 148.3139, "step": 1797, "task_loss": 2.513068914413452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3326365117849943, "compression/movement_sparsity/importance_threshold": -0.0005092220913181086, "compression/movement_sparsity/linear_layer_sparsity": 0.8458398176956937, "compression/movement_sparsity/model_sparsity": 0.8167826178502088, "compression_loss": 143.06912231445312, "distillation_loss": 4.897464752197266, "epoch": 1.52, "learning_rate": 3.914382320975728e-05, "loss": 148.4563, "step": 1798, "task_loss": 3.0545711517333984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3335158277837729, "compression/movement_sparsity/importance_threshold": -0.0005065466742566992, "compression/movement_sparsity/linear_layer_sparsity": 0.8463216613856913, "compression/movement_sparsity/model_sparsity": 0.8172479087271304, "compression_loss": 143.16224670410156, "distillation_loss": 5.793306827545166, "epoch": 1.52, "learning_rate": 3.913778529163145e-05, "loss": 148.3859, "step": 1799, "task_loss": 3.607916831970215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3343920584664284, "compression/movement_sparsity/importance_threshold": -0.0005038806446139264, "compression/movement_sparsity/linear_layer_sparsity": 0.8469339316212899, "compression/movement_sparsity/model_sparsity": 0.8178391455965727, "compression_loss": 143.25506591796875, "distillation_loss": 8.277213096618652, "epoch": 1.52, "learning_rate": 3.9131747373505614e-05, "loss": 149.3106, "step": 1800, "task_loss": 3.839252233505249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3352652092553092, "compression/movement_sparsity/importance_threshold": -0.0005012239858916924, "compression/movement_sparsity/linear_layer_sparsity": 0.8476092091586765, "compression/movement_sparsity/model_sparsity": 0.8184912252731533, "compression_loss": 143.3475799560547, "distillation_loss": 7.331027984619141, "epoch": 1.52, "learning_rate": 3.912570945537979e-05, "loss": 149.7581, "step": 1801, "task_loss": 3.7918877601623535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3361352855727633, "compression/movement_sparsity/importance_threshold": -0.0004985766815918972, "compression/movement_sparsity/linear_layer_sparsity": 0.8482062641563718, "compression/movement_sparsity/model_sparsity": 0.8190677695949218, "compression_loss": 143.43975830078125, "distillation_loss": 6.094710826873779, "epoch": 1.52, "learning_rate": 3.9119671537253955e-05, "loss": 149.996, "step": 1802, "task_loss": 3.4455535411834717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3370022928411394, "compression/movement_sparsity/importance_threshold": -0.0004959387152164403, "compression/movement_sparsity/linear_layer_sparsity": 0.8489092296110089, "compression/movement_sparsity/model_sparsity": 0.8197465860236173, "compression_loss": 143.5316162109375, "distillation_loss": 4.777237892150879, "epoch": 1.52, "learning_rate": 3.911363361912813e-05, "loss": 150.0481, "step": 1803, "task_loss": 3.3300676345825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.337866236482785, "compression/movement_sparsity/importance_threshold": -0.0004933100702672229, "compression/movement_sparsity/linear_layer_sparsity": 0.8495149773269107, "compression/movement_sparsity/model_sparsity": 0.82033152444198, "compression_loss": 143.6231689453125, "distillation_loss": 7.864965438842773, "epoch": 1.52, "learning_rate": 3.9107595701002296e-05, "loss": 150.5879, "step": 1804, "task_loss": 3.260775089263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3387271219200483, "compression/movement_sparsity/importance_threshold": -0.0004906907302461459, "compression/movement_sparsity/linear_layer_sparsity": 0.8502480513048283, "compression/movement_sparsity/model_sparsity": 0.8210394150735567, "compression_loss": 143.71438598632812, "distillation_loss": 7.671809673309326, "epoch": 1.53, "learning_rate": 3.9101557782876464e-05, "loss": 150.1447, "step": 1805, "task_loss": 4.524222373962402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.339584954575278, "compression/movement_sparsity/importance_threshold": -0.0004880806786551088, "compression/movement_sparsity/linear_layer_sparsity": 0.8507274505404605, "compression/movement_sparsity/model_sparsity": 0.8215023454706405, "compression_loss": 143.80523681640625, "distillation_loss": 6.320964813232422, "epoch": 1.53, "learning_rate": 3.909551986475064e-05, "loss": 149.961, "step": 1806, "task_loss": 3.266688346862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3404397398708223, "compression/movement_sparsity/importance_threshold": -0.00048547989899601265, "compression/movement_sparsity/linear_layer_sparsity": 0.8511533937325811, "compression/movement_sparsity/model_sparsity": 0.8219136562037572, "compression_loss": 143.89590454101562, "distillation_loss": 7.13585090637207, "epoch": 1.53, "learning_rate": 3.9089481946624805e-05, "loss": 150.1457, "step": 1807, "task_loss": 4.044832229614258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3412914832290288, "compression/movement_sparsity/importance_threshold": -0.0004828883747707586, "compression/movement_sparsity/linear_layer_sparsity": 0.8515032249626822, "compression/movement_sparsity/model_sparsity": 0.8222514696548975, "compression_loss": 143.98617553710938, "distillation_loss": 7.288115501403809, "epoch": 1.53, "learning_rate": 3.908344402849898e-05, "loss": 150.3703, "step": 1808, "task_loss": 3.8762247562408447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3421401900722463, "compression/movement_sparsity/importance_threshold": -0.0004803060894812451, "compression/movement_sparsity/linear_layer_sparsity": 0.8520252888701155, "compression/movement_sparsity/model_sparsity": 0.8227555990610541, "compression_loss": 144.07611083984375, "distillation_loss": 6.305458068847656, "epoch": 1.53, "learning_rate": 3.9077406110373146e-05, "loss": 150.6307, "step": 1809, "task_loss": 3.966034412384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3429858658228226, "compression/movement_sparsity/importance_threshold": -0.00047773302662937507, "compression/movement_sparsity/linear_layer_sparsity": 0.8526517369410331, "compression/movement_sparsity/model_sparsity": 0.8233605267135561, "compression_loss": 144.16567993164062, "distillation_loss": 7.580005645751953, "epoch": 1.53, "learning_rate": 3.907136819224731e-05, "loss": 150.6131, "step": 1810, "task_loss": 3.3612935543060303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3438285159031063, "compression/movement_sparsity/importance_threshold": -0.00047516916971704695, "compression/movement_sparsity/linear_layer_sparsity": 0.8531029951410448, "compression/movement_sparsity/model_sparsity": 0.8237962828061647, "compression_loss": 144.25503540039062, "distillation_loss": 6.881827354431152, "epoch": 1.53, "learning_rate": 3.906533027412149e-05, "loss": 151.228, "step": 1811, "task_loss": 3.7318949699401855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.344668145735445, "compression/movement_sparsity/importance_threshold": -0.00047261450224616187, "compression/movement_sparsity/linear_layer_sparsity": 0.8536398688646819, "compression/movement_sparsity/model_sparsity": 0.8243147132657781, "compression_loss": 144.34396362304688, "distillation_loss": 6.9348602294921875, "epoch": 1.53, "learning_rate": 3.9059292355995654e-05, "loss": 150.595, "step": 1812, "task_loss": 3.9608113765716553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3455047607421875, "compression/movement_sparsity/importance_threshold": -0.0004700690077186209, "compression/movement_sparsity/linear_layer_sparsity": 0.8543050704804162, "compression/movement_sparsity/model_sparsity": 0.8249570631596124, "compression_loss": 144.43276977539062, "distillation_loss": 5.598838806152344, "epoch": 1.53, "learning_rate": 3.905325443786982e-05, "loss": 149.9999, "step": 1813, "task_loss": 1.7702279090881348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3463383663456816, "compression/movement_sparsity/importance_threshold": -0.0004675326696363235, "compression/movement_sparsity/linear_layer_sparsity": 0.8547549693253172, "compression/movement_sparsity/model_sparsity": 0.8253915065951404, "compression_loss": 144.5210723876953, "distillation_loss": 6.831597805023193, "epoch": 1.53, "learning_rate": 3.9047216519743995e-05, "loss": 150.4648, "step": 1814, "task_loss": 3.3680505752563477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3471689679682757, "compression/movement_sparsity/importance_threshold": -0.00046500547150117155, "compression/movement_sparsity/linear_layer_sparsity": 0.8554045979781191, "compression/movement_sparsity/model_sparsity": 0.826018818505227, "compression_loss": 144.60910034179688, "distillation_loss": 6.159719467163086, "epoch": 1.53, "learning_rate": 3.904117860161816e-05, "loss": 151.1119, "step": 1815, "task_loss": 3.7059710025787354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.347996571032318, "compression/movement_sparsity/importance_threshold": -0.0004624873968150636, "compression/movement_sparsity/linear_layer_sparsity": 0.8558579906041375, "compression/movement_sparsity/model_sparsity": 0.8264566356997428, "compression_loss": 144.6968536376953, "distillation_loss": 6.644157409667969, "epoch": 1.53, "learning_rate": 3.903514068349233e-05, "loss": 151.1702, "step": 1816, "task_loss": 3.3798046112060547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3488211809601565, "compression/movement_sparsity/importance_threshold": -0.00045997842907990246, "compression/movement_sparsity/linear_layer_sparsity": 0.8563611666300356, "compression/movement_sparsity/model_sparsity": 0.8269425260812008, "compression_loss": 144.78424072265625, "distillation_loss": 6.457886695861816, "epoch": 1.54, "learning_rate": 3.9029102765366504e-05, "loss": 151.1055, "step": 1817, "task_loss": 3.5257694721221924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3496428031741394, "compression/movement_sparsity/importance_threshold": -0.0004574785517975858, "compression/movement_sparsity/linear_layer_sparsity": 0.8567924041525866, "compression/movement_sparsity/model_sparsity": 0.8273589492682104, "compression_loss": 144.8712615966797, "distillation_loss": 8.108641624450684, "epoch": 1.54, "learning_rate": 3.902306484724068e-05, "loss": 151.6141, "step": 1818, "task_loss": 3.3345062732696533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3504614430966153, "compression/movement_sparsity/importance_threshold": -0.00045498774847001645, "compression/movement_sparsity/linear_layer_sparsity": 0.8573195119829299, "compression/movement_sparsity/model_sparsity": 0.827867949323008, "compression_loss": 144.9580841064453, "distillation_loss": 6.7251667976379395, "epoch": 1.54, "learning_rate": 3.9017026929114845e-05, "loss": 151.8117, "step": 1819, "task_loss": 2.868752956390381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3512771061499318, "compression/movement_sparsity/importance_threshold": -0.00045250600259909377, "compression/movement_sparsity/linear_layer_sparsity": 0.8578487065426096, "compression/movement_sparsity/model_sparsity": 0.8283789644215698, "compression_loss": 145.04441833496094, "distillation_loss": 6.845142364501953, "epoch": 1.54, "learning_rate": 3.901098901098901e-05, "loss": 151.5003, "step": 1820, "task_loss": 3.3127574920654297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3520897977564377, "compression/movement_sparsity/importance_threshold": -0.000450033297686718, "compression/movement_sparsity/linear_layer_sparsity": 0.8583046270921667, "compression/movement_sparsity/model_sparsity": 0.828819222697674, "compression_loss": 145.13047790527344, "distillation_loss": 7.886384963989258, "epoch": 1.54, "learning_rate": 3.9004951092863186e-05, "loss": 151.9832, "step": 1821, "task_loss": 3.1895766258239746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3528995233384806, "compression/movement_sparsity/importance_threshold": -0.00044756961723479115, "compression/movement_sparsity/linear_layer_sparsity": 0.858686248153185, "compression/movement_sparsity/model_sparsity": 0.8291877339012426, "compression_loss": 145.2162322998047, "distillation_loss": 5.889693260192871, "epoch": 1.54, "learning_rate": 3.899891317473735e-05, "loss": 151.2134, "step": 1822, "task_loss": 3.0155718326568604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3537062883184092, "compression/movement_sparsity/importance_threshold": -0.0004451149447452108, "compression/movement_sparsity/linear_layer_sparsity": 0.8593464058460092, "compression/movement_sparsity/model_sparsity": 0.8298252131464356, "compression_loss": 145.30152893066406, "distillation_loss": 7.186237335205078, "epoch": 1.54, "learning_rate": 3.899287525661152e-05, "loss": 152.0894, "step": 1823, "task_loss": 2.638792037963867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3545100981185716, "compression/movement_sparsity/importance_threshold": -0.00044266926371987984, "compression/movement_sparsity/linear_layer_sparsity": 0.8597539381233001, "compression/movement_sparsity/model_sparsity": 0.8302187454362857, "compression_loss": 145.3865966796875, "distillation_loss": 7.422829627990723, "epoch": 1.54, "learning_rate": 3.8986837338485694e-05, "loss": 152.3152, "step": 1824, "task_loss": 3.345848321914673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3553109581613154, "compression/movement_sparsity/importance_threshold": -0.00044023255766069934, "compression/movement_sparsity/linear_layer_sparsity": 0.8601871789060139, "compression/movement_sparsity/model_sparsity": 0.8306371030653088, "compression_loss": 145.4713592529297, "distillation_loss": 5.802145957946777, "epoch": 1.54, "learning_rate": 3.898079942035986e-05, "loss": 151.5976, "step": 1825, "task_loss": 2.584536075592041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.35610887386899, "compression/movement_sparsity/importance_threshold": -0.00043780481006956695, "compression/movement_sparsity/linear_layer_sparsity": 0.8606166516517548, "compression/movement_sparsity/model_sparsity": 0.8310518221010207, "compression_loss": 145.55572509765625, "distillation_loss": 6.697346210479736, "epoch": 1.54, "learning_rate": 3.897476150223403e-05, "loss": 151.6336, "step": 1826, "task_loss": 3.2098422050476074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3569038506639421, "compression/movement_sparsity/importance_threshold": -0.0004353860044483855, "compression/movement_sparsity/linear_layer_sparsity": 0.8610834470421959, "compression/movement_sparsity/model_sparsity": 0.8315025816337697, "compression_loss": 145.6398162841797, "distillation_loss": 6.716588973999023, "epoch": 1.54, "learning_rate": 3.89687235841082e-05, "loss": 152.0652, "step": 1827, "task_loss": 2.6326754093170166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.357695893968521, "compression/movement_sparsity/importance_threshold": -0.00043297612429905354, "compression/movement_sparsity/linear_layer_sparsity": 0.8616363826196385, "compression/movement_sparsity/model_sparsity": 0.8320365221730983, "compression_loss": 145.7235565185547, "distillation_loss": 7.821836948394775, "epoch": 1.54, "learning_rate": 3.8962685665982377e-05, "loss": 152.6999, "step": 1828, "task_loss": 3.8367185592651367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3584850092050746, "compression/movement_sparsity/importance_threshold": -0.00043057515312347385, "compression/movement_sparsity/linear_layer_sparsity": 0.8621652671509595, "compression/movement_sparsity/model_sparsity": 0.8325472378937293, "compression_loss": 145.80709838867188, "distillation_loss": 6.658909320831299, "epoch": 1.55, "learning_rate": 3.895664774785654e-05, "loss": 152.3078, "step": 1829, "task_loss": 2.985086679458618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3592712017959512, "compression/movement_sparsity/importance_threshold": -0.00042818307442354497, "compression/movement_sparsity/linear_layer_sparsity": 0.8625916396131151, "compression/movement_sparsity/model_sparsity": 0.8329589631501347, "compression_loss": 145.89027404785156, "distillation_loss": 5.07117223739624, "epoch": 1.55, "learning_rate": 3.895060982973071e-05, "loss": 152.3992, "step": 1830, "task_loss": 3.29555606842041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3600544771634988, "compression/movement_sparsity/importance_threshold": -0.000425799871701168, "compression/movement_sparsity/linear_layer_sparsity": 0.8630750095965701, "compression/movement_sparsity/model_sparsity": 0.833425727887638, "compression_loss": 145.9732208251953, "distillation_loss": 5.778958320617676, "epoch": 1.55, "learning_rate": 3.8944571911604885e-05, "loss": 152.3016, "step": 1831, "task_loss": 3.1990954875946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3608348407300652, "compression/movement_sparsity/importance_threshold": -0.00042342552845824315, "compression/movement_sparsity/linear_layer_sparsity": 0.8634221936614559, "compression/movement_sparsity/model_sparsity": 0.8337609851118318, "compression_loss": 146.05580139160156, "distillation_loss": 7.808193683624268, "epoch": 1.55, "learning_rate": 3.8938533993479045e-05, "loss": 152.9909, "step": 1832, "task_loss": 4.440708160400391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3616122979179996, "compression/movement_sparsity/importance_threshold": -0.00042106002819667157, "compression/movement_sparsity/linear_layer_sparsity": 0.8637997128088073, "compression/movement_sparsity/model_sparsity": 0.834125535315087, "compression_loss": 146.13807678222656, "distillation_loss": 5.318569183349609, "epoch": 1.55, "learning_rate": 3.893249607535322e-05, "loss": 152.3235, "step": 1833, "task_loss": 2.386603593826294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3623868541496493, "compression/movement_sparsity/importance_threshold": -0.0004187033544183535, "compression/movement_sparsity/linear_layer_sparsity": 0.8642485742511241, "compression/movement_sparsity/model_sparsity": 0.8345589769860009, "compression_loss": 146.22006225585938, "distillation_loss": 6.53436279296875, "epoch": 1.55, "learning_rate": 3.892645815722739e-05, "loss": 152.1619, "step": 1834, "task_loss": 3.377082586288452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3631585148473628, "compression/movement_sparsity/importance_threshold": -0.0004163554906251891, "compression/movement_sparsity/linear_layer_sparsity": 0.864793115214551, "compression/movement_sparsity/model_sparsity": 0.8350848112921302, "compression_loss": 146.3018035888672, "distillation_loss": 6.998291969299316, "epoch": 1.55, "learning_rate": 3.892042023910156e-05, "loss": 152.5884, "step": 1835, "task_loss": 3.403266191482544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3639272854334887, "compression/movement_sparsity/importance_threshold": -0.0004140164203190787, "compression/movement_sparsity/linear_layer_sparsity": 0.865275733975445, "compression/movement_sparsity/model_sparsity": 0.8355508506138785, "compression_loss": 146.38316345214844, "distillation_loss": 7.559309959411621, "epoch": 1.55, "learning_rate": 3.891438232097573e-05, "loss": 153.0688, "step": 1836, "task_loss": 3.664708137512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3646931713303745, "compression/movement_sparsity/importance_threshold": -0.0004116861270019234, "compression/movement_sparsity/linear_layer_sparsity": 0.8656975991022342, "compression/movement_sparsity/model_sparsity": 0.8359582233757535, "compression_loss": 146.46429443359375, "distillation_loss": 7.0397539138793945, "epoch": 1.55, "learning_rate": 3.89083444028499e-05, "loss": 152.5111, "step": 1837, "task_loss": 2.7290971279144287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3654561779603689, "compression/movement_sparsity/importance_threshold": -0.0004093645941756225, "compression/movement_sparsity/linear_layer_sparsity": 0.8660820223426468, "compression/movement_sparsity/model_sparsity": 0.8363294404952337, "compression_loss": 146.54501342773438, "distillation_loss": 7.92360782623291, "epoch": 1.55, "learning_rate": 3.890230648472407e-05, "loss": 153.3794, "step": 1838, "task_loss": 4.660275459289551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3662163107458198, "compression/movement_sparsity/importance_threshold": -0.00040705180534207804, "compression/movement_sparsity/linear_layer_sparsity": 0.8664540206003828, "compression/movement_sparsity/model_sparsity": 0.836688659468416, "compression_loss": 146.62538146972656, "distillation_loss": 6.339056968688965, "epoch": 1.55, "learning_rate": 3.8896268566598236e-05, "loss": 153.2743, "step": 1839, "task_loss": 2.3425607681274414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3669735751090755, "compression/movement_sparsity/importance_threshold": -0.00040474774400318936, "compression/movement_sparsity/linear_layer_sparsity": 0.8669722449258374, "compression/movement_sparsity/model_sparsity": 0.8371890811940468, "compression_loss": 146.7054443359375, "distillation_loss": 5.48922061920166, "epoch": 1.56, "learning_rate": 3.889023064847241e-05, "loss": 151.9742, "step": 1840, "task_loss": 2.817523241043091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3677279764724841, "compression/movement_sparsity/importance_threshold": -0.0004024523936608567, "compression/movement_sparsity/linear_layer_sparsity": 0.8674021946382837, "compression/movement_sparsity/model_sparsity": 0.8376042608111905, "compression_loss": 146.7852783203125, "distillation_loss": 7.308719635009766, "epoch": 1.56, "learning_rate": 3.888419273034658e-05, "loss": 153.5786, "step": 1841, "task_loss": 3.795900344848633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3684795202583941, "compression/movement_sparsity/importance_threshold": -0.00040016573781698206, "compression/movement_sparsity/linear_layer_sparsity": 0.8678654962476076, "compression/movement_sparsity/model_sparsity": 0.8380516465849516, "compression_loss": 146.86483764648438, "distillation_loss": 7.258925914764404, "epoch": 1.56, "learning_rate": 3.8878154812220744e-05, "loss": 153.6335, "step": 1842, "task_loss": 3.5141148567199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3692282118891534, "compression/movement_sparsity/importance_threshold": -0.0003978877599734639, "compression/movement_sparsity/linear_layer_sparsity": 0.8683738592864273, "compression/movement_sparsity/model_sparsity": 0.8385425457894803, "compression_loss": 146.94386291503906, "distillation_loss": 8.211523056030273, "epoch": 1.56, "learning_rate": 3.887211689409492e-05, "loss": 153.0897, "step": 1843, "task_loss": 3.171069383621216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3699740567871104, "compression/movement_sparsity/importance_threshold": -0.00039561844363220425, "compression/movement_sparsity/linear_layer_sparsity": 0.8688186068909097, "compression/movement_sparsity/model_sparsity": 0.8389720149455451, "compression_loss": 147.02268981933594, "distillation_loss": 6.05572509765625, "epoch": 1.56, "learning_rate": 3.886607897596909e-05, "loss": 153.6951, "step": 1844, "task_loss": 2.3466174602508545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3707170603746128, "compression/movement_sparsity/importance_threshold": -0.0003933577722951033, "compression/movement_sparsity/linear_layer_sparsity": 0.8692269142390969, "compression/movement_sparsity/model_sparsity": 0.8393662956802217, "compression_loss": 147.1012420654297, "distillation_loss": 5.430069923400879, "epoch": 1.56, "learning_rate": 3.886004105784325e-05, "loss": 152.8183, "step": 1845, "task_loss": 3.2519054412841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3714572280740096, "compression/movement_sparsity/importance_threshold": -0.00039110572946406044, "compression/movement_sparsity/linear_layer_sparsity": 0.86960018838277, "compression/movement_sparsity/model_sparsity": 0.839726746708734, "compression_loss": 147.17938232421875, "distillation_loss": 6.71021032333374, "epoch": 1.56, "learning_rate": 3.8854003139717427e-05, "loss": 154.1356, "step": 1846, "task_loss": 2.3991525173187256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3721945653076486, "compression/movement_sparsity/importance_threshold": -0.00038886229864097677, "compression/movement_sparsity/linear_layer_sparsity": 0.870217001726238, "compression/movement_sparsity/model_sparsity": 0.840322370616314, "compression_loss": 147.25733947753906, "distillation_loss": 6.815332889556885, "epoch": 1.56, "learning_rate": 3.88479652215916e-05, "loss": 153.8392, "step": 1847, "task_loss": 3.1035261154174805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.372929077497878, "compression/movement_sparsity/importance_threshold": -0.0003866274633277534, "compression/movement_sparsity/linear_layer_sparsity": 0.8706333459634117, "compression/movement_sparsity/model_sparsity": 0.8407244121481162, "compression_loss": 147.3348388671875, "distillation_loss": 6.4255852699279785, "epoch": 1.56, "learning_rate": 3.884192730346577e-05, "loss": 153.221, "step": 1848, "task_loss": 1.9447343349456787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3736607700670453, "compression/movement_sparsity/importance_threshold": -0.00038440120702629057, "compression/movement_sparsity/linear_layer_sparsity": 0.8710746951801179, "compression/movement_sparsity/model_sparsity": 0.8411505996614795, "compression_loss": 147.41209411621094, "distillation_loss": 4.901268482208252, "epoch": 1.56, "learning_rate": 3.8835889385339935e-05, "loss": 153.0275, "step": 1849, "task_loss": 3.0868444442749023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3743896484375, "compression/movement_sparsity/importance_threshold": -0.00038218351323848765, "compression/movement_sparsity/linear_layer_sparsity": 0.8713199991567229, "compression/movement_sparsity/model_sparsity": 0.8413874766918449, "compression_loss": 147.48895263671875, "distillation_loss": 7.811885833740234, "epoch": 1.56, "learning_rate": 3.882985146721411e-05, "loss": 153.5925, "step": 1850, "task_loss": 4.08037805557251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3751157180315896, "compression/movement_sparsity/importance_threshold": -0.00037997436546624574, "compression/movement_sparsity/linear_layer_sparsity": 0.8717392051941293, "compression/movement_sparsity/model_sparsity": 0.8417922817122377, "compression_loss": 147.5656280517578, "distillation_loss": 6.7906174659729, "epoch": 1.56, "learning_rate": 3.8823813549088276e-05, "loss": 153.3325, "step": 1851, "task_loss": 3.44189453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.375838984271662, "compression/movement_sparsity/importance_threshold": -0.00037777374721146683, "compression/movement_sparsity/linear_layer_sparsity": 0.8721336328111884, "compression/movement_sparsity/model_sparsity": 0.8421731595272496, "compression_loss": 147.64190673828125, "distillation_loss": 6.649721145629883, "epoch": 1.57, "learning_rate": 3.881777563096244e-05, "loss": 153.2356, "step": 1852, "task_loss": 3.3841075897216797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.376559452580066, "compression/movement_sparsity/importance_threshold": -0.0003755816419760494, "compression/movement_sparsity/linear_layer_sparsity": 0.8725386967857786, "compression/movement_sparsity/model_sparsity": 0.8425643083081901, "compression_loss": 147.71788024902344, "distillation_loss": 5.944565773010254, "epoch": 1.57, "learning_rate": 3.881173771283662e-05, "loss": 154.516, "step": 1853, "task_loss": 3.51729154586792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3772771283791496, "compression/movement_sparsity/importance_threshold": -0.0003733980332618937, "compression/movement_sparsity/linear_layer_sparsity": 0.8729470279823011, "compression/movement_sparsity/model_sparsity": 0.8429586120719383, "compression_loss": 147.79364013671875, "distillation_loss": 6.437558174133301, "epoch": 1.57, "learning_rate": 3.8805699794710784e-05, "loss": 154.7225, "step": 1854, "task_loss": 3.6579341888427734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.377992017091261, "compression/movement_sparsity/importance_threshold": -0.00037122290457090087, "compression/movement_sparsity/linear_layer_sparsity": 0.8733794579216156, "compression/movement_sparsity/model_sparsity": 0.8433761867125273, "compression_loss": 147.8690185546875, "distillation_loss": 6.866792678833008, "epoch": 1.57, "learning_rate": 3.879966187658495e-05, "loss": 154.4009, "step": 1855, "task_loss": 3.614643096923828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3787041241387479, "compression/movement_sparsity/importance_threshold": -0.00036905623940497283, "compression/movement_sparsity/linear_layer_sparsity": 0.8736728997629666, "compression/movement_sparsity/model_sparsity": 0.8436595479238952, "compression_loss": 147.94430541992188, "distillation_loss": 5.848559856414795, "epoch": 1.57, "learning_rate": 3.8793623958459125e-05, "loss": 153.4799, "step": 1856, "task_loss": 2.795853853225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3794134549439594, "compression/movement_sparsity/importance_threshold": -0.0003668980212660081, "compression/movement_sparsity/linear_layer_sparsity": 0.8739568380427118, "compression/movement_sparsity/model_sparsity": 0.8439337320502348, "compression_loss": 148.01904296875, "distillation_loss": 8.760519027709961, "epoch": 1.57, "learning_rate": 3.878758604033329e-05, "loss": 155.183, "step": 1857, "task_loss": 3.912856101989746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.380120014929243, "compression/movement_sparsity/importance_threshold": -0.00036474823365590693, "compression/movement_sparsity/linear_layer_sparsity": 0.8742087241598517, "compression/movement_sparsity/model_sparsity": 0.8441769651043587, "compression_loss": 148.0936279296875, "distillation_loss": 8.316190719604492, "epoch": 1.57, "learning_rate": 3.878154812220747e-05, "loss": 155.7073, "step": 1858, "task_loss": 4.517300605773926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3808238095169467, "compression/movement_sparsity/importance_threshold": -0.00036260686007657215, "compression/movement_sparsity/linear_layer_sparsity": 0.8745523190502792, "compression/movement_sparsity/model_sparsity": 0.8445087564532784, "compression_loss": 148.16793823242188, "distillation_loss": 5.306129455566406, "epoch": 1.57, "learning_rate": 3.8775510204081634e-05, "loss": 154.5206, "step": 1859, "task_loss": 3.934784412384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.38152484412942, "compression/movement_sparsity/importance_threshold": -0.0003604738840299014, "compression/movement_sparsity/linear_layer_sparsity": 0.874915743831485, "compression/movement_sparsity/model_sparsity": 0.8448596964752244, "compression_loss": 148.24195861816406, "distillation_loss": 5.32120418548584, "epoch": 1.57, "learning_rate": 3.876947228595581e-05, "loss": 155.0777, "step": 1860, "task_loss": 3.496925115585327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3822231241890095, "compression/movement_sparsity/importance_threshold": -0.00035834928901779667, "compression/movement_sparsity/linear_layer_sparsity": 0.8752382329451969, "compression/movement_sparsity/model_sparsity": 0.8451711070957878, "compression_loss": 148.31578063964844, "distillation_loss": 6.774896621704102, "epoch": 1.57, "learning_rate": 3.8763434367829975e-05, "loss": 154.906, "step": 1861, "task_loss": 2.6619465351104736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3829186551180643, "compression/movement_sparsity/importance_threshold": -0.00035623305854215904, "compression/movement_sparsity/linear_layer_sparsity": 0.875502067078308, "compression/movement_sparsity/model_sparsity": 0.8454258777147778, "compression_loss": 148.3892822265625, "distillation_loss": 8.568330764770508, "epoch": 1.57, "learning_rate": 3.875739644970414e-05, "loss": 155.346, "step": 1862, "task_loss": 3.8568685054779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3836114423389325, "compression/movement_sparsity/importance_threshold": -0.000354125176104887, "compression/movement_sparsity/linear_layer_sparsity": 0.8759798207788065, "compression/movement_sparsity/model_sparsity": 0.8458872191059219, "compression_loss": 148.4624481201172, "distillation_loss": 5.960892200469971, "epoch": 1.57, "learning_rate": 3.8751358531578316e-05, "loss": 155.2434, "step": 1863, "task_loss": 2.86572527885437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3843014912739622, "compression/movement_sparsity/importance_threshold": -0.0003520256252078826, "compression/movement_sparsity/linear_layer_sparsity": 0.8763086297013655, "compression/movement_sparsity/model_sparsity": 0.8462047324304564, "compression_loss": 148.53521728515625, "distillation_loss": 7.4248948097229, "epoch": 1.58, "learning_rate": 3.874532061345248e-05, "loss": 155.5059, "step": 1864, "task_loss": 4.450930118560791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3849888073455014, "compression/movement_sparsity/importance_threshold": -0.0003499343893530451, "compression/movement_sparsity/linear_layer_sparsity": 0.8766158439563358, "compression/movement_sparsity/model_sparsity": 0.8465013929306668, "compression_loss": 148.6078643798828, "distillation_loss": 6.729351043701172, "epoch": 1.58, "learning_rate": 3.873928269532665e-05, "loss": 155.0728, "step": 1865, "task_loss": 3.897453784942627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3856733959758984, "compression/movement_sparsity/importance_threshold": -0.0003478514520422766, "compression/movement_sparsity/linear_layer_sparsity": 0.877021301428458, "compression/movement_sparsity/model_sparsity": 0.8468929216912886, "compression_loss": 148.68002319335938, "distillation_loss": 7.422763347625732, "epoch": 1.58, "learning_rate": 3.8733244777200824e-05, "loss": 154.481, "step": 1866, "task_loss": 2.692875862121582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3863552625875017, "compression/movement_sparsity/importance_threshold": -0.00034577679677747636, "compression/movement_sparsity/linear_layer_sparsity": 0.8774258288155047, "compression/movement_sparsity/model_sparsity": 0.8472835523181185, "compression_loss": 148.7519989013672, "distillation_loss": 7.419010639190674, "epoch": 1.58, "learning_rate": 3.872720685907499e-05, "loss": 155.4015, "step": 1867, "task_loss": 3.4218645095825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3870344126026593, "compression/movement_sparsity/importance_threshold": -0.00034371040706054554, "compression/movement_sparsity/linear_layer_sparsity": 0.8778272797673013, "compression/movement_sparsity/model_sparsity": 0.8476712121947132, "compression_loss": 148.82383728027344, "distillation_loss": 7.416689872741699, "epoch": 1.58, "learning_rate": 3.8721168940949166e-05, "loss": 155.3377, "step": 1868, "task_loss": 2.8182859420776367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3877108514437193, "compression/movement_sparsity/importance_threshold": -0.0003416522663933835, "compression/movement_sparsity/linear_layer_sparsity": 0.8781502577718863, "compression/movement_sparsity/model_sparsity": 0.8479830949112441, "compression_loss": 148.89508056640625, "distillation_loss": 6.940967559814453, "epoch": 1.58, "learning_rate": 3.871513102282333e-05, "loss": 155.2136, "step": 1869, "task_loss": 3.554002285003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3883845845330298, "compression/movement_sparsity/importance_threshold": -0.0003396023582778922, "compression/movement_sparsity/linear_layer_sparsity": 0.8784544432882772, "compression/movement_sparsity/model_sparsity": 0.8482768307193629, "compression_loss": 148.96621704101562, "distillation_loss": 7.293410301208496, "epoch": 1.58, "learning_rate": 3.87090931046975e-05, "loss": 155.2493, "step": 1870, "task_loss": 3.9629340171813965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3890556172929394, "compression/movement_sparsity/importance_threshold": -0.0003375606662159702, "compression/movement_sparsity/linear_layer_sparsity": 0.8786245176912675, "compression/movement_sparsity/model_sparsity": 0.8484410625434005, "compression_loss": 149.03701782226562, "distillation_loss": 6.5900654792785645, "epoch": 1.58, "learning_rate": 3.8703055186571674e-05, "loss": 155.2219, "step": 1871, "task_loss": 3.0699574947357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3897239551457958, "compression/movement_sparsity/importance_threshold": -0.00033552717370952025, "compression/movement_sparsity/linear_layer_sparsity": 0.8789879782449762, "compression/movement_sparsity/model_sparsity": 0.8487920371089538, "compression_loss": 149.1076202392578, "distillation_loss": 6.195268154144287, "epoch": 1.58, "learning_rate": 3.869701726844584e-05, "loss": 155.1058, "step": 1872, "task_loss": 3.514054298400879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3903896035139476, "compression/movement_sparsity/importance_threshold": -0.00033350186426044006, "compression/movement_sparsity/linear_layer_sparsity": 0.8794589947907604, "compression/movement_sparsity/model_sparsity": 0.849246872787374, "compression_loss": 149.17779541015625, "distillation_loss": 7.014634609222412, "epoch": 1.58, "learning_rate": 3.869097935032001e-05, "loss": 156.168, "step": 1873, "task_loss": 3.4906058311462402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.391052567819743, "compression/movement_sparsity/importance_threshold": -0.00033148472137063244, "compression/movement_sparsity/linear_layer_sparsity": 0.8797602827344158, "compression/movement_sparsity/model_sparsity": 0.8495378105632947, "compression_loss": 149.24778747558594, "distillation_loss": 5.868443489074707, "epoch": 1.58, "learning_rate": 3.868494143219418e-05, "loss": 155.0878, "step": 1874, "task_loss": 3.1042256355285645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3917128534855299, "compression/movement_sparsity/importance_threshold": -0.0003294757285419968, "compression/movement_sparsity/linear_layer_sparsity": 0.8801181866260062, "compression/movement_sparsity/model_sparsity": 0.8498834193551678, "compression_loss": 149.31739807128906, "distillation_loss": 6.940214157104492, "epoch": 1.58, "learning_rate": 3.867890351406835e-05, "loss": 155.6364, "step": 1875, "task_loss": 3.999532699584961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3923704659336564, "compression/movement_sparsity/importance_threshold": -0.00032747486927643416, "compression/movement_sparsity/linear_layer_sparsity": 0.8805425916005019, "compression/movement_sparsity/model_sparsity": 0.8502932447131671, "compression_loss": 149.38674926757812, "distillation_loss": 6.561287879943848, "epoch": 1.59, "learning_rate": 3.867286559594252e-05, "loss": 155.3528, "step": 1876, "task_loss": 2.372581958770752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3930254105864712, "compression/movement_sparsity/importance_threshold": -0.00032548212707584484, "compression/movement_sparsity/linear_layer_sparsity": 0.8809120738588667, "compression/movement_sparsity/model_sparsity": 0.8506500341192967, "compression_loss": 149.45570373535156, "distillation_loss": 7.122910499572754, "epoch": 1.59, "learning_rate": 3.866682767781669e-05, "loss": 156.1486, "step": 1877, "task_loss": 2.7499966621398926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3936776928663221, "compression/movement_sparsity/importance_threshold": -0.00032349748544212905, "compression/movement_sparsity/linear_layer_sparsity": 0.8813811706136615, "compression/movement_sparsity/model_sparsity": 0.851103015957454, "compression_loss": 149.52439880371094, "distillation_loss": 5.738148212432861, "epoch": 1.59, "learning_rate": 3.8660789759690865e-05, "loss": 155.0254, "step": 1878, "task_loss": 3.077583074569702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3943273181955573, "compression/movement_sparsity/importance_threshold": -0.00032152092787718703, "compression/movement_sparsity/linear_layer_sparsity": 0.8817419720779875, "compression/movement_sparsity/model_sparsity": 0.8514514227815252, "compression_loss": 149.5928955078125, "distillation_loss": 7.894509315490723, "epoch": 1.59, "learning_rate": 3.865475184156503e-05, "loss": 156.6239, "step": 1879, "task_loss": 4.385012626647949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3949742919965253, "compression/movement_sparsity/importance_threshold": -0.0003195524378829199, "compression/movement_sparsity/linear_layer_sparsity": 0.8820942119899509, "compression/movement_sparsity/model_sparsity": 0.851791562168896, "compression_loss": 149.6609649658203, "distillation_loss": 6.579346656799316, "epoch": 1.59, "learning_rate": 3.86487139234392e-05, "loss": 155.8185, "step": 1880, "task_loss": 2.5204596519470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.395618619691574, "compression/movement_sparsity/importance_threshold": -0.00031759199896122785, "compression/movement_sparsity/linear_layer_sparsity": 0.882432739109133, "compression/movement_sparsity/model_sparsity": 0.8521184598401029, "compression_loss": 149.72869873046875, "distillation_loss": 5.737729072570801, "epoch": 1.59, "learning_rate": 3.864267600531337e-05, "loss": 155.3987, "step": 1881, "task_loss": 3.7331557273864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3962603067030517, "compression/movement_sparsity/importance_threshold": -0.0003156395946140103, "compression/movement_sparsity/linear_layer_sparsity": 0.882787089598768, "compression/movement_sparsity/model_sparsity": 0.8524606373003093, "compression_loss": 149.79624938964844, "distillation_loss": 6.429390907287598, "epoch": 1.59, "learning_rate": 3.863663808718754e-05, "loss": 155.9553, "step": 1882, "task_loss": 2.23868989944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3968993584533067, "compression/movement_sparsity/importance_threshold": -0.0003136952083431692, "compression/movement_sparsity/linear_layer_sparsity": 0.8832131401083974, "compression/movement_sparsity/model_sparsity": 0.8528720516642483, "compression_loss": 149.86355590820312, "distillation_loss": 6.766284465789795, "epoch": 1.59, "learning_rate": 3.863060016906171e-05, "loss": 155.9906, "step": 1883, "task_loss": 3.0568654537200928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3975357803646873, "compression/movement_sparsity/importance_threshold": -0.00031175882365060484, "compression/movement_sparsity/linear_layer_sparsity": 0.8834918079060475, "compression/movement_sparsity/model_sparsity": 0.8531411463657667, "compression_loss": 149.9305877685547, "distillation_loss": 6.789269924163818, "epoch": 1.59, "learning_rate": 3.862456225093588e-05, "loss": 156.1786, "step": 1884, "task_loss": 3.200939416885376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.398169577859541, "compression/movement_sparsity/importance_threshold": -0.0003098304240382165, "compression/movement_sparsity/linear_layer_sparsity": 0.8839675225738802, "compression/movement_sparsity/model_sparsity": 0.8536005187712898, "compression_loss": 149.99729919433594, "distillation_loss": 5.876088619232178, "epoch": 1.59, "learning_rate": 3.861852433281005e-05, "loss": 156.4167, "step": 1885, "task_loss": 3.2096548080444336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3988007563602167, "compression/movement_sparsity/importance_threshold": -0.00030790999300790624, "compression/movement_sparsity/linear_layer_sparsity": 0.8843559881071213, "compression/movement_sparsity/model_sparsity": 0.8539756393184045, "compression_loss": 150.06370544433594, "distillation_loss": 5.651353359222412, "epoch": 1.59, "learning_rate": 3.8612486414684216e-05, "loss": 155.6909, "step": 1886, "task_loss": 2.902829170227051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3994293212890625, "compression/movement_sparsity/importance_threshold": -0.0003059975140615734, "compression/movement_sparsity/linear_layer_sparsity": 0.8847507019042037, "compression/movement_sparsity/model_sparsity": 0.8543567934822753, "compression_loss": 150.1298065185547, "distillation_loss": 6.559399604797363, "epoch": 1.59, "learning_rate": 3.860644849655839e-05, "loss": 156.243, "step": 1887, "task_loss": 3.0456230640411377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4000552780684263, "compression/movement_sparsity/importance_threshold": -0.00030409297070111904, "compression/movement_sparsity/linear_layer_sparsity": 0.8850197945952423, "compression/movement_sparsity/model_sparsity": 0.8546166420115507, "compression_loss": 150.1956024169922, "distillation_loss": 6.62272310256958, "epoch": 1.6, "learning_rate": 3.8600410578432563e-05, "loss": 156.3165, "step": 1888, "task_loss": 2.7173609733581543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4006786321206566, "compression/movement_sparsity/importance_threshold": -0.00030219634642844346, "compression/movement_sparsity/linear_layer_sparsity": 0.8852501218172966, "compression/movement_sparsity/model_sparsity": 0.8548390567849581, "compression_loss": 150.26124572753906, "distillation_loss": 5.735922813415527, "epoch": 1.6, "learning_rate": 3.8594372660306724e-05, "loss": 156.0371, "step": 1889, "task_loss": 3.3753914833068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4012993888681013, "compression/movement_sparsity/importance_threshold": -0.000300307624745446, "compression/movement_sparsity/linear_layer_sparsity": 0.8854829769628897, "compression/movement_sparsity/model_sparsity": 0.8550639126399538, "compression_loss": 150.32656860351562, "distillation_loss": 6.453309059143066, "epoch": 1.6, "learning_rate": 3.85883347421809e-05, "loss": 156.4494, "step": 1890, "task_loss": 3.215193748474121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4019175537331088, "compression/movement_sparsity/importance_threshold": -0.00029842678915402866, "compression/movement_sparsity/linear_layer_sparsity": 0.8857196716904616, "compression/movement_sparsity/model_sparsity": 0.8552924761754755, "compression_loss": 150.39154052734375, "distillation_loss": 6.791268348693848, "epoch": 1.6, "learning_rate": 3.858229682405507e-05, "loss": 156.3278, "step": 1891, "task_loss": 3.4626686573028564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4025331321380274, "compression/movement_sparsity/importance_threshold": -0.00029655382315609165, "compression/movement_sparsity/linear_layer_sparsity": 0.8860986455862646, "compression/movement_sparsity/model_sparsity": 0.8556584311520976, "compression_loss": 150.4563446044922, "distillation_loss": 6.4242658615112305, "epoch": 1.6, "learning_rate": 3.857625890592924e-05, "loss": 157.1079, "step": 1892, "task_loss": 2.8037991523742676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.403146129505205, "compression/movement_sparsity/importance_threshold": -0.0002946887102535352, "compression/movement_sparsity/linear_layer_sparsity": 0.8864198230415365, "compression/movement_sparsity/model_sparsity": 0.8559685751737235, "compression_loss": 150.5208282470703, "distillation_loss": 6.069797515869141, "epoch": 1.6, "learning_rate": 3.8570220987803406e-05, "loss": 156.7057, "step": 1893, "task_loss": 3.1066486835479736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.40375655125699, "compression/movement_sparsity/importance_threshold": -0.0002928314339482596, "compression/movement_sparsity/linear_layer_sparsity": 0.8867824966001813, "compression/movement_sparsity/model_sparsity": 0.8563187897799145, "compression_loss": 150.58505249023438, "distillation_loss": 5.87023401260376, "epoch": 1.6, "learning_rate": 3.856418306967758e-05, "loss": 156.1895, "step": 1894, "task_loss": 3.146287679672241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4043644028157305, "compression/movement_sparsity/importance_threshold": -0.00029098197774216506, "compression/movement_sparsity/linear_layer_sparsity": 0.8870396055027459, "compression/movement_sparsity/model_sparsity": 0.8565670662007164, "compression_loss": 150.6488494873047, "distillation_loss": 5.058915615081787, "epoch": 1.6, "learning_rate": 3.855814515155175e-05, "loss": 157.0146, "step": 1895, "task_loss": 3.167494773864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4049696896037747, "compression/movement_sparsity/importance_threshold": -0.0002891403251371527, "compression/movement_sparsity/linear_layer_sparsity": 0.8873788599961537, "compression/movement_sparsity/model_sparsity": 0.8568946662586069, "compression_loss": 150.7125244140625, "distillation_loss": 5.118819236755371, "epoch": 1.6, "learning_rate": 3.8552107233425914e-05, "loss": 156.6973, "step": 1896, "task_loss": 2.721879005432129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.405572417043471, "compression/movement_sparsity/importance_threshold": -0.0002873064596351227, "compression/movement_sparsity/linear_layer_sparsity": 0.8877576192569393, "compression/movement_sparsity/model_sparsity": 0.8572604139735847, "compression_loss": 150.77581787109375, "distillation_loss": 5.839515686035156, "epoch": 1.6, "learning_rate": 3.854606931530009e-05, "loss": 156.4697, "step": 1897, "task_loss": 3.584444522857666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4061725905571674, "compression/movement_sparsity/importance_threshold": -0.0002854803647379762, "compression/movement_sparsity/linear_layer_sparsity": 0.8880939642534441, "compression/movement_sparsity/model_sparsity": 0.8575852044847413, "compression_loss": 150.83892822265625, "distillation_loss": 6.333454132080078, "epoch": 1.6, "learning_rate": 3.8540031397174256e-05, "loss": 156.2799, "step": 1898, "task_loss": 2.3072192668914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4067702155672117, "compression/movement_sparsity/importance_threshold": -0.0002836620239476126, "compression/movement_sparsity/linear_layer_sparsity": 0.8883948586995675, "compression/movement_sparsity/model_sparsity": 0.8578757622809807, "compression_loss": 150.90158081054688, "distillation_loss": 6.045659065246582, "epoch": 1.6, "learning_rate": 3.853399347904842e-05, "loss": 157.1078, "step": 1899, "task_loss": 2.536705255508423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4073652974959527, "compression/movement_sparsity/importance_threshold": -0.00028185142076593214, "compression/movement_sparsity/linear_layer_sparsity": 0.8888079595631443, "compression/movement_sparsity/model_sparsity": 0.8582746718590468, "compression_loss": 150.9640655517578, "distillation_loss": 5.64579963684082, "epoch": 1.61, "learning_rate": 3.85279555609226e-05, "loss": 156.3829, "step": 1900, "task_loss": 3.3389205932617188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4079578417657388, "compression/movement_sparsity/importance_threshold": -0.0002800485386948359, "compression/movement_sparsity/linear_layer_sparsity": 0.8892509066183137, "compression/movement_sparsity/model_sparsity": 0.8587024023202066, "compression_loss": 151.02621459960938, "distillation_loss": 6.069151878356934, "epoch": 1.61, "learning_rate": 3.852191764279677e-05, "loss": 156.895, "step": 1901, "task_loss": 3.4109585285186768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4085478537989173, "compression/movement_sparsity/importance_threshold": -0.00027825336123622497, "compression/movement_sparsity/linear_layer_sparsity": 0.8894739243256072, "compression/movement_sparsity/model_sparsity": 0.8589177586831719, "compression_loss": 151.0880889892578, "distillation_loss": 9.036394119262695, "epoch": 1.61, "learning_rate": 3.851587972467093e-05, "loss": 158.418, "step": 1902, "task_loss": 3.5200376510620117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4091353390178372, "compression/movement_sparsity/importance_threshold": -0.00027646587189199875, "compression/movement_sparsity/linear_layer_sparsity": 0.8897052889502459, "compression/movement_sparsity/model_sparsity": 0.8591411752211934, "compression_loss": 151.1497802734375, "distillation_loss": 6.922142028808594, "epoch": 1.61, "learning_rate": 3.8509841806545105e-05, "loss": 156.95, "step": 1903, "task_loss": 4.287223815917969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4097203028448462, "compression/movement_sparsity/importance_threshold": -0.0002746860541640592, "compression/movement_sparsity/linear_layer_sparsity": 0.890032058840139, "compression/movement_sparsity/model_sparsity": 0.8594567195601069, "compression_loss": 151.21107482910156, "distillation_loss": 7.750855922698975, "epoch": 1.61, "learning_rate": 3.850380388841928e-05, "loss": 157.382, "step": 1904, "task_loss": 3.9015185832977295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.410302750702293, "compression/movement_sparsity/importance_threshold": -0.00027291389155430397, "compression/movement_sparsity/linear_layer_sparsity": 0.8901319118199218, "compression/movement_sparsity/model_sparsity": 0.8595531422828504, "compression_loss": 151.2722930908203, "distillation_loss": 5.996115684509277, "epoch": 1.61, "learning_rate": 3.849776597029344e-05, "loss": 157.5897, "step": 1905, "task_loss": 3.646399974822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4108826880125251, "compression/movement_sparsity/importance_threshold": -0.00027114936756463676, "compression/movement_sparsity/linear_layer_sparsity": 0.8905521314115772, "compression/movement_sparsity/model_sparsity": 0.8599589260387859, "compression_loss": 151.33309936523438, "distillation_loss": 5.609531402587891, "epoch": 1.61, "learning_rate": 3.8491728052167613e-05, "loss": 156.9539, "step": 1906, "task_loss": 2.981043815612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4114601201978914, "compression/movement_sparsity/importance_threshold": -0.0002693924656969552, "compression/movement_sparsity/linear_layer_sparsity": 0.8908761110462436, "compression/movement_sparsity/model_sparsity": 0.8602717759763235, "compression_loss": 151.39352416992188, "distillation_loss": 8.969968795776367, "epoch": 1.61, "learning_rate": 3.848569013404179e-05, "loss": 158.0768, "step": 1907, "task_loss": 3.391901731491089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4120350526807395, "compression/movement_sparsity/importance_threshold": -0.00026764316945316214, "compression/movement_sparsity/linear_layer_sparsity": 0.891250816090033, "compression/movement_sparsity/model_sparsity": 0.8606336087491312, "compression_loss": 151.4539337158203, "distillation_loss": 8.696922302246094, "epoch": 1.61, "learning_rate": 3.8479652215915955e-05, "loss": 157.4791, "step": 1908, "task_loss": 3.6983954906463623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.412607490883418, "compression/movement_sparsity/importance_threshold": -0.0002659014623351561, "compression/movement_sparsity/linear_layer_sparsity": 0.8915811870785523, "compression/movement_sparsity/model_sparsity": 0.8609526304778548, "compression_loss": 151.513916015625, "distillation_loss": 6.751202583312988, "epoch": 1.61, "learning_rate": 3.847361429779012e-05, "loss": 157.8596, "step": 1909, "task_loss": 2.997176170349121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.413177440228275, "compression/movement_sparsity/importance_threshold": -0.000264167327844839, "compression/movement_sparsity/linear_layer_sparsity": 0.8919303147827626, "compression/movement_sparsity/model_sparsity": 0.8612897645713831, "compression_loss": 151.57373046875, "distillation_loss": 5.813370704650879, "epoch": 1.61, "learning_rate": 3.8467576379664296e-05, "loss": 157.6691, "step": 1910, "task_loss": 2.9291272163391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4137449061376586, "compression/movement_sparsity/importance_threshold": -0.00026244074948411113, "compression/movement_sparsity/linear_layer_sparsity": 0.892104872672784, "compression/movement_sparsity/model_sparsity": 0.8614583258608793, "compression_loss": 151.6332244873047, "distillation_loss": 7.464130401611328, "epoch": 1.61, "learning_rate": 3.846153846153846e-05, "loss": 158.2357, "step": 1911, "task_loss": 3.74951171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4143098940339172, "compression/movement_sparsity/importance_threshold": -0.00026072171075487184, "compression/movement_sparsity/linear_layer_sparsity": 0.892312651293839, "compression/movement_sparsity/model_sparsity": 0.8616589666470993, "compression_loss": 151.69247436523438, "distillation_loss": 5.525993824005127, "epoch": 1.62, "learning_rate": 3.845550054341263e-05, "loss": 157.0393, "step": 1912, "task_loss": 2.8973495960235596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4148724093393985, "compression/movement_sparsity/importance_threshold": -0.00025901019515902225, "compression/movement_sparsity/linear_layer_sparsity": 0.892653539398213, "compression/movement_sparsity/model_sparsity": 0.8619881441963936, "compression_loss": 151.7513885498047, "distillation_loss": 6.149102210998535, "epoch": 1.62, "learning_rate": 3.8449462625286804e-05, "loss": 158.2012, "step": 1913, "task_loss": 3.071350574493408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4154324574764514, "compression/movement_sparsity/importance_threshold": -0.0002573061861984626, "compression/movement_sparsity/linear_layer_sparsity": 0.8929730116975131, "compression/movement_sparsity/model_sparsity": 0.8622966416394009, "compression_loss": 151.8101806640625, "distillation_loss": 6.122520446777344, "epoch": 1.62, "learning_rate": 3.844342470716097e-05, "loss": 157.5365, "step": 1914, "task_loss": 3.0856828689575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4159900438674236, "compression/movement_sparsity/importance_threshold": -0.0002556096673750948, "compression/movement_sparsity/linear_layer_sparsity": 0.8933443779743644, "compression/movement_sparsity/model_sparsity": 0.862655250342186, "compression_loss": 151.86865234375, "distillation_loss": 7.150974273681641, "epoch": 1.62, "learning_rate": 3.843738678903514e-05, "loss": 158.3788, "step": 1915, "task_loss": 3.0702965259552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4165451739346633, "compression/movement_sparsity/importance_threshold": -0.00025392062219081746, "compression/movement_sparsity/linear_layer_sparsity": 0.8936192896592092, "compression/movement_sparsity/model_sparsity": 0.8629207179649292, "compression_loss": 151.92684936523438, "distillation_loss": 7.095952033996582, "epoch": 1.62, "learning_rate": 3.843134887090931e-05, "loss": 157.9278, "step": 1916, "task_loss": 3.2225875854492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.417097853100519, "compression/movement_sparsity/importance_threshold": -0.0002522390341475316, "compression/movement_sparsity/linear_layer_sparsity": 0.8938943802065685, "compression/movement_sparsity/model_sparsity": 0.8631863583057093, "compression_loss": 151.98477172851562, "distillation_loss": 6.597002029418945, "epoch": 1.62, "learning_rate": 3.8425310952783486e-05, "loss": 158.3976, "step": 1917, "task_loss": 3.2087225914001465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4176480867873387, "compression/movement_sparsity/importance_threshold": -0.0002505648867471375, "compression/movement_sparsity/linear_layer_sparsity": 0.8940815180934458, "compression/movement_sparsity/model_sparsity": 0.8633670674304688, "compression_loss": 152.04229736328125, "distillation_loss": 7.084159851074219, "epoch": 1.62, "learning_rate": 3.8419273034657653e-05, "loss": 158.5595, "step": 1918, "task_loss": 3.503180503845215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4181958804174704, "compression/movement_sparsity/importance_threshold": -0.0002488981634915363, "compression/movement_sparsity/linear_layer_sparsity": 0.8944814904484556, "compression/movement_sparsity/model_sparsity": 0.8637532995046251, "compression_loss": 152.09979248046875, "distillation_loss": 7.364809036254883, "epoch": 1.62, "learning_rate": 3.841323511653182e-05, "loss": 159.6463, "step": 1919, "task_loss": 2.637944221496582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4187412394132628, "compression/movement_sparsity/importance_threshold": -0.00024723884788262814, "compression/movement_sparsity/linear_layer_sparsity": 0.894842733106984, "compression/movement_sparsity/model_sparsity": 0.8641021323665207, "compression_loss": 152.15696716308594, "distillation_loss": 5.917118549346924, "epoch": 1.62, "learning_rate": 3.8407197198405995e-05, "loss": 158.3999, "step": 1920, "task_loss": 2.311518430709839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4192841691970637, "compression/movement_sparsity/importance_threshold": -0.00024558692342231334, "compression/movement_sparsity/linear_layer_sparsity": 0.895067145941891, "compression/movement_sparsity/model_sparsity": 0.864318835930174, "compression_loss": 152.2137908935547, "distillation_loss": 5.295403003692627, "epoch": 1.62, "learning_rate": 3.840115928028016e-05, "loss": 158.3144, "step": 1921, "task_loss": 3.3356878757476807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4198246751912214, "compression/movement_sparsity/importance_threshold": -0.0002439423736124921, "compression/movement_sparsity/linear_layer_sparsity": 0.8952257492956156, "compression/movement_sparsity/model_sparsity": 0.8644719907707772, "compression_loss": 152.27037048339844, "distillation_loss": 6.522974014282227, "epoch": 1.62, "learning_rate": 3.839512136215433e-05, "loss": 158.8991, "step": 1922, "task_loss": 2.797461748123169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4203627628180842, "compression/movement_sparsity/importance_threshold": -0.00024230518195506555, "compression/movement_sparsity/linear_layer_sparsity": 0.8956119492370059, "compression/movement_sparsity/model_sparsity": 0.8648449235560908, "compression_loss": 152.32669067382812, "distillation_loss": 7.050638198852539, "epoch": 1.63, "learning_rate": 3.83890834440285e-05, "loss": 158.3056, "step": 1923, "task_loss": 3.2105298042297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4208984375, "compression/movement_sparsity/importance_threshold": -0.0002406753319519339, "compression/movement_sparsity/linear_layer_sparsity": 0.895969829280261, "compression/movement_sparsity/model_sparsity": 0.8651905093188923, "compression_loss": 152.38287353515625, "distillation_loss": 7.551097869873047, "epoch": 1.63, "learning_rate": 3.838304552590267e-05, "loss": 159.6248, "step": 1924, "task_loss": 3.4849605560302734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4214317046593172, "compression/movement_sparsity/importance_threshold": -0.00023905280710499743, "compression/movement_sparsity/linear_layer_sparsity": 0.8963923979329408, "compression/movement_sparsity/model_sparsity": 0.8655985614383792, "compression_loss": 152.43861389160156, "distillation_loss": 6.809576034545898, "epoch": 1.63, "learning_rate": 3.837700760777684e-05, "loss": 159.0645, "step": 1925, "task_loss": 3.5584347248077393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4219625697183842, "compression/movement_sparsity/importance_threshold": -0.00023743759091615722, "compression/movement_sparsity/linear_layer_sparsity": 0.8966419230648889, "compression/movement_sparsity/model_sparsity": 0.8658395146144158, "compression_loss": 152.49424743652344, "distillation_loss": 7.455321788787842, "epoch": 1.63, "learning_rate": 3.837096968965101e-05, "loss": 159.3402, "step": 1926, "task_loss": 3.625943422317505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4224910380995488, "compression/movement_sparsity/importance_threshold": -0.00023582966688731177, "compression/movement_sparsity/linear_layer_sparsity": 0.8968738242770711, "compression/movement_sparsity/model_sparsity": 0.866063449306548, "compression_loss": 152.5496063232422, "distillation_loss": 6.751021862030029, "epoch": 1.63, "learning_rate": 3.836493177152518e-05, "loss": 158.7307, "step": 1927, "task_loss": 2.8171238899230957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4230171152251594, "compression/movement_sparsity/importance_threshold": -0.00023422901852036392, "compression/movement_sparsity/linear_layer_sparsity": 0.8971317917197054, "compression/movement_sparsity/model_sparsity": 0.8663125547739271, "compression_loss": 152.60470581054688, "distillation_loss": 4.98211145401001, "epoch": 1.63, "learning_rate": 3.835889385339935e-05, "loss": 158.6095, "step": 1928, "task_loss": 3.651984214782715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4235408065175643, "compression/movement_sparsity/importance_threshold": -0.00023263562931721306, "compression/movement_sparsity/linear_layer_sparsity": 0.8974040681635027, "compression/movement_sparsity/model_sparsity": 0.8665754776842597, "compression_loss": 152.65965270996094, "distillation_loss": 6.469775676727295, "epoch": 1.63, "learning_rate": 3.835285593527352e-05, "loss": 159.7873, "step": 1929, "task_loss": 2.6886167526245117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4240621173991113, "compression/movement_sparsity/importance_threshold": -0.00023104948277976028, "compression/movement_sparsity/linear_layer_sparsity": 0.8976850969463447, "compression/movement_sparsity/model_sparsity": 0.8668468522638654, "compression_loss": 152.71426391601562, "distillation_loss": 6.927318096160889, "epoch": 1.63, "learning_rate": 3.834681801714769e-05, "loss": 158.5927, "step": 1930, "task_loss": 3.365535259246826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.424581053292149, "compression/movement_sparsity/importance_threshold": -0.00022947056240990495, "compression/movement_sparsity/linear_layer_sparsity": 0.8979098794304483, "compression/movement_sparsity/model_sparsity": 0.8670639127781283, "compression_loss": 152.7686309814453, "distillation_loss": 5.36340856552124, "epoch": 1.63, "learning_rate": 3.834078009902186e-05, "loss": 158.8465, "step": 1931, "task_loss": 3.8297770023345947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4250976196190255, "compression/movement_sparsity/importance_threshold": -0.0002278988517095482, "compression/movement_sparsity/linear_layer_sparsity": 0.8982550960076742, "compression/movement_sparsity/model_sparsity": 0.867397270103916, "compression_loss": 152.82275390625, "distillation_loss": 7.523814678192139, "epoch": 1.63, "learning_rate": 3.833474218089603e-05, "loss": 159.0863, "step": 1932, "task_loss": 3.859551191329956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.425611821802089, "compression/movement_sparsity/importance_threshold": -0.00022633433418059023, "compression/movement_sparsity/linear_layer_sparsity": 0.8984958449522423, "compression/movement_sparsity/model_sparsity": 0.8676297485816078, "compression_loss": 152.87664794921875, "distillation_loss": 7.889961242675781, "epoch": 1.63, "learning_rate": 3.83287042627702e-05, "loss": 159.1737, "step": 1933, "task_loss": 3.9454383850097656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4261236652636875, "compression/movement_sparsity/importance_threshold": -0.00022477699332493217, "compression/movement_sparsity/linear_layer_sparsity": 0.8987760748158526, "compression/movement_sparsity/model_sparsity": 0.8679003516873153, "compression_loss": 152.93026733398438, "distillation_loss": 7.234272003173828, "epoch": 1.63, "learning_rate": 3.832266634464437e-05, "loss": 159.5948, "step": 1934, "task_loss": 3.3416173458099365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4266331554261695, "compression/movement_sparsity/importance_threshold": -0.0002232268126444734, "compression/movement_sparsity/linear_layer_sparsity": 0.8990791871571563, "compression/movement_sparsity/model_sparsity": 0.8681930511872126, "compression_loss": 152.98362731933594, "distillation_loss": 7.496713638305664, "epoch": 1.64, "learning_rate": 3.8316628426518536e-05, "loss": 160.0006, "step": 1935, "task_loss": 4.045901775360107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4271402977118828, "compression/movement_sparsity/importance_threshold": -0.00022168377564111587, "compression/movement_sparsity/linear_layer_sparsity": 0.8992916996727627, "compression/movement_sparsity/model_sparsity": 0.8683982632441428, "compression_loss": 153.0367889404297, "distillation_loss": 7.040009498596191, "epoch": 1.64, "learning_rate": 3.831059050839271e-05, "loss": 159.8056, "step": 1936, "task_loss": 3.3511879444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.427645097543176, "compression/movement_sparsity/importance_threshold": -0.0002201478658167581, "compression/movement_sparsity/linear_layer_sparsity": 0.8995876694376524, "compression/movement_sparsity/model_sparsity": 0.8686840655370992, "compression_loss": 153.0896759033203, "distillation_loss": 7.740026473999023, "epoch": 1.64, "learning_rate": 3.830455259026688e-05, "loss": 159.3719, "step": 1937, "task_loss": 3.3991854190826416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.428147560342397, "compression/movement_sparsity/importance_threshold": -0.00021861906667330292, "compression/movement_sparsity/linear_layer_sparsity": 0.8998162557312319, "compression/movement_sparsity/model_sparsity": 0.8689047991882805, "compression_loss": 153.14227294921875, "distillation_loss": 7.398360252380371, "epoch": 1.64, "learning_rate": 3.829851467214105e-05, "loss": 159.4896, "step": 1938, "task_loss": 2.395493984222412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4286476915318942, "compression/movement_sparsity/importance_threshold": -0.00021709736171264798, "compression/movement_sparsity/linear_layer_sparsity": 0.9000546317664405, "compression/movement_sparsity/model_sparsity": 0.8691349862733492, "compression_loss": 153.19480895996094, "distillation_loss": 6.806671619415283, "epoch": 1.64, "learning_rate": 3.829247675401522e-05, "loss": 160.3127, "step": 1939, "task_loss": 2.657777786254883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4291454965340158, "compression/movement_sparsity/importance_threshold": -0.0002155827344366961, "compression/movement_sparsity/linear_layer_sparsity": 0.9003250122675837, "compression/movement_sparsity/model_sparsity": 0.8693960783724903, "compression_loss": 153.2468719482422, "distillation_loss": 6.341867446899414, "epoch": 1.64, "learning_rate": 3.8286438835889386e-05, "loss": 158.6906, "step": 1940, "task_loss": 3.045822858810425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.42964098077111, "compression/movement_sparsity/importance_threshold": -0.00021407516834734668, "compression/movement_sparsity/linear_layer_sparsity": 0.9006473940637869, "compression/movement_sparsity/model_sparsity": 0.8697073853622316, "compression_loss": 153.2987060546875, "distillation_loss": 5.51707649230957, "epoch": 1.64, "learning_rate": 3.828040091776356e-05, "loss": 159.9611, "step": 1941, "task_loss": 2.703279972076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4301341496655244, "compression/movement_sparsity/importance_threshold": -0.0002125746469465008, "compression/movement_sparsity/linear_layer_sparsity": 0.9010875508637295, "compression/movement_sparsity/model_sparsity": 0.8701324214220154, "compression_loss": 153.35035705566406, "distillation_loss": 6.027688980102539, "epoch": 1.64, "learning_rate": 3.827436299963773e-05, "loss": 159.7969, "step": 1942, "task_loss": 2.8079710006713867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4306250086396082, "compression/movement_sparsity/importance_threshold": -0.000211081153736057, "compression/movement_sparsity/linear_layer_sparsity": 0.9012942920822, "compression/movement_sparsity/model_sparsity": 0.8703320604436212, "compression_loss": 153.4016876220703, "distillation_loss": 8.256545066833496, "epoch": 1.64, "learning_rate": 3.8268325081511894e-05, "loss": 160.3008, "step": 1943, "task_loss": 3.6278116703033447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4311135631157088, "compression/movement_sparsity/importance_threshold": -0.00020959467221791893, "compression/movement_sparsity/linear_layer_sparsity": 0.9014992446755252, "compression/movement_sparsity/model_sparsity": 0.8705299722848577, "compression_loss": 153.4528350830078, "distillation_loss": 6.8891825675964355, "epoch": 1.64, "learning_rate": 3.826228716338607e-05, "loss": 160.1271, "step": 1944, "task_loss": 3.0895731449127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.431599818516175, "compression/movement_sparsity/importance_threshold": -0.00020811518589398427, "compression/movement_sparsity/linear_layer_sparsity": 0.9017244922021668, "compression/movement_sparsity/model_sparsity": 0.8707474818660167, "compression_loss": 153.5037841796875, "distillation_loss": 6.901221752166748, "epoch": 1.64, "learning_rate": 3.8256249245260235e-05, "loss": 159.4664, "step": 1945, "task_loss": 3.3441033363342285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4320837802633544, "compression/movement_sparsity/importance_threshold": -0.00020664267826615498, "compression/movement_sparsity/linear_layer_sparsity": 0.9019950754141597, "compression/movement_sparsity/model_sparsity": 0.8710087697122664, "compression_loss": 153.5543975830078, "distillation_loss": 5.953382968902588, "epoch": 1.64, "learning_rate": 3.82502113271344e-05, "loss": 160.2562, "step": 1946, "task_loss": 4.020930290222168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4325654537795955, "compression/movement_sparsity/importance_threshold": -0.00020517713283633218, "compression/movement_sparsity/linear_layer_sparsity": 0.9022372433346765, "compression/movement_sparsity/model_sparsity": 0.8712426184197177, "compression_loss": 153.60484313964844, "distillation_loss": 7.471131324768066, "epoch": 1.65, "learning_rate": 3.8244173409008576e-05, "loss": 160.126, "step": 1947, "task_loss": 3.3768246173858643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4330448444872466, "compression/movement_sparsity/importance_threshold": -0.00020371853310641348, "compression/movement_sparsity/linear_layer_sparsity": 0.9024447596240434, "compression/movement_sparsity/model_sparsity": 0.8714430058861501, "compression_loss": 153.65501403808594, "distillation_loss": 7.707447052001953, "epoch": 1.65, "learning_rate": 3.8238135490882743e-05, "loss": 160.0766, "step": 1948, "task_loss": 4.069027900695801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4335219578086558, "compression/movement_sparsity/importance_threshold": -0.00020226686257830175, "compression/movement_sparsity/linear_layer_sparsity": 0.9027659728518183, "compression/movement_sparsity/model_sparsity": 0.8717531844513834, "compression_loss": 153.70481872558594, "distillation_loss": 8.208759307861328, "epoch": 1.65, "learning_rate": 3.823209757275692e-05, "loss": 161.3708, "step": 1949, "task_loss": 3.846348524093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.433996799166171, "compression/movement_sparsity/importance_threshold": -0.0002008221047538972, "compression/movement_sparsity/linear_layer_sparsity": 0.9030649355827847, "compression/movement_sparsity/model_sparsity": 0.8720418768928242, "compression_loss": 153.75440979003906, "distillation_loss": 9.980860710144043, "epoch": 1.65, "learning_rate": 3.8226059654631085e-05, "loss": 161.0352, "step": 1950, "task_loss": 4.253302097320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.434469373982141, "compression/movement_sparsity/importance_threshold": -0.00019938424313509923, "compression/movement_sparsity/linear_layer_sparsity": 0.9032926871846296, "compression/movement_sparsity/model_sparsity": 0.8722618045264999, "compression_loss": 153.80381774902344, "distillation_loss": 6.548616409301758, "epoch": 1.65, "learning_rate": 3.822002173650526e-05, "loss": 160.1709, "step": 1951, "task_loss": 2.5666110515594482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4349396876789133, "compression/movement_sparsity/importance_threshold": -0.0001979532612238098, "compression/movement_sparsity/linear_layer_sparsity": 0.9035432020224915, "compression/movement_sparsity/model_sparsity": 0.8725037134090075, "compression_loss": 153.85301208496094, "distillation_loss": 8.077263832092285, "epoch": 1.65, "learning_rate": 3.8213983818379426e-05, "loss": 160.1771, "step": 1952, "task_loss": 2.981919527053833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4354077456788368, "compression/movement_sparsity/importance_threshold": -0.00019652914252192826, "compression/movement_sparsity/linear_layer_sparsity": 0.9037222553237115, "compression/movement_sparsity/model_sparsity": 0.8726766156784982, "compression_loss": 153.90185546875, "distillation_loss": 7.715006351470947, "epoch": 1.65, "learning_rate": 3.820794590025359e-05, "loss": 160.3011, "step": 1953, "task_loss": 3.968613386154175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.435873553404259, "compression/movement_sparsity/importance_threshold": -0.00019511187053135575, "compression/movement_sparsity/linear_layer_sparsity": 0.9040080180011051, "compression/movement_sparsity/model_sparsity": 0.8729525615288144, "compression_loss": 153.9506072998047, "distillation_loss": 6.49777889251709, "epoch": 1.65, "learning_rate": 3.820190798212777e-05, "loss": 160.8285, "step": 1954, "task_loss": 2.9812023639678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.436337116277529, "compression/movement_sparsity/importance_threshold": -0.00019370142875399076, "compression/movement_sparsity/linear_layer_sparsity": 0.9043232214483914, "compression/movement_sparsity/model_sparsity": 0.8732569367680073, "compression_loss": 153.9990234375, "distillation_loss": 6.795435905456543, "epoch": 1.65, "learning_rate": 3.8195870064001934e-05, "loss": 160.6026, "step": 1955, "task_loss": 3.6901659965515137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4367984397209939, "compression/movement_sparsity/importance_threshold": -0.000192297800691737, "compression/movement_sparsity/linear_layer_sparsity": 0.9045080818192502, "compression/movement_sparsity/model_sparsity": 0.87343544661643, "compression_loss": 154.04721069335938, "distillation_loss": 9.469181060791016, "epoch": 1.65, "learning_rate": 3.81898321458761e-05, "loss": 160.9894, "step": 1956, "task_loss": 4.162032604217529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4372575291570024, "compression/movement_sparsity/importance_threshold": -0.00019090096984649297, "compression/movement_sparsity/linear_layer_sparsity": 0.9046436357569347, "compression/movement_sparsity/model_sparsity": 0.8735663438593424, "compression_loss": 154.0951385498047, "distillation_loss": 5.460579872131348, "epoch": 1.65, "learning_rate": 3.8183794227750275e-05, "loss": 159.9049, "step": 1957, "task_loss": 2.5839810371398926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.437714390007903, "compression/movement_sparsity/importance_threshold": -0.0001895109197201589, "compression/movement_sparsity/linear_layer_sparsity": 0.9049524120778654, "compression/movement_sparsity/model_sparsity": 0.873864512763742, "compression_loss": 154.1427459716797, "distillation_loss": 6.584769248962402, "epoch": 1.65, "learning_rate": 3.817775630962444e-05, "loss": 160.8876, "step": 1958, "task_loss": 3.030895233154297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4381690276960435, "compression/movement_sparsity/importance_threshold": -0.00018812763381463593, "compression/movement_sparsity/linear_layer_sparsity": 0.9052091751795684, "compression/movement_sparsity/model_sparsity": 0.8741124552630058, "compression_loss": 154.19012451171875, "distillation_loss": 7.80364465713501, "epoch": 1.66, "learning_rate": 3.817171839149861e-05, "loss": 161.1926, "step": 1959, "task_loss": 3.5214920043945312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4386214476437722, "compression/movement_sparsity/importance_threshold": -0.00018675109563182513, "compression/movement_sparsity/linear_layer_sparsity": 0.9055465933511604, "compression/movement_sparsity/model_sparsity": 0.8744382820823839, "compression_loss": 154.2372589111328, "distillation_loss": 9.102875709533691, "epoch": 1.66, "learning_rate": 3.8165680473372784e-05, "loss": 161.7429, "step": 1960, "task_loss": 4.296591758728027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4390716552734375, "compression/movement_sparsity/importance_threshold": -0.00018538128867362502, "compression/movement_sparsity/linear_layer_sparsity": 0.905795510350559, "compression/movement_sparsity/model_sparsity": 0.874678648017095, "compression_loss": 154.28424072265625, "distillation_loss": 6.282147407531738, "epoch": 1.66, "learning_rate": 3.815964255524696e-05, "loss": 161.0876, "step": 1961, "task_loss": 2.305163860321045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4395196560073873, "compression/movement_sparsity/importance_threshold": -0.00018401819644193757, "compression/movement_sparsity/linear_layer_sparsity": 0.9059984239112185, "compression/movement_sparsity/model_sparsity": 0.8748745908727107, "compression_loss": 154.3309783935547, "distillation_loss": 6.44364595413208, "epoch": 1.66, "learning_rate": 3.815360463712112e-05, "loss": 160.4177, "step": 1962, "task_loss": 2.2141098976135254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.43996545526797, "compression/movement_sparsity/importance_threshold": -0.00018266180243866302, "compression/movement_sparsity/linear_layer_sparsity": 0.9061760820848253, "compression/movement_sparsity/model_sparsity": 0.8750461459415136, "compression_loss": 154.37742614746094, "distillation_loss": 5.939295768737793, "epoch": 1.66, "learning_rate": 3.814756671899529e-05, "loss": 160.5975, "step": 1963, "task_loss": 2.339681625366211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4404090584775335, "compression/movement_sparsity/importance_threshold": -0.0001813120901657016, "compression/movement_sparsity/linear_layer_sparsity": 0.9064238662882985, "compression/movement_sparsity/model_sparsity": 0.8752854179953241, "compression_loss": 154.42367553710938, "distillation_loss": 7.142485618591309, "epoch": 1.66, "learning_rate": 3.8141528800869466e-05, "loss": 160.7727, "step": 1964, "task_loss": 3.8312253952026367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4408504710584262, "compression/movement_sparsity/importance_threshold": -0.0001799690431249553, "compression/movement_sparsity/linear_layer_sparsity": 0.9066003916659798, "compression/movement_sparsity/model_sparsity": 0.8754558791832265, "compression_loss": 154.4696502685547, "distillation_loss": 7.039237976074219, "epoch": 1.66, "learning_rate": 3.813549088274363e-05, "loss": 161.4191, "step": 1965, "task_loss": 3.5504560470581055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4412896984329964, "compression/movement_sparsity/importance_threshold": -0.00017863264481832088, "compression/movement_sparsity/linear_layer_sparsity": 0.9068281790403278, "compression/movement_sparsity/model_sparsity": 0.8756758413605097, "compression_loss": 154.51553344726562, "distillation_loss": 6.728089332580566, "epoch": 1.66, "learning_rate": 3.81294529646178e-05, "loss": 161.4745, "step": 1966, "task_loss": 3.701183795928955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4417267460235919, "compression/movement_sparsity/importance_threshold": -0.00017730287874770292, "compression/movement_sparsity/linear_layer_sparsity": 0.9069930902787318, "compression/movement_sparsity/model_sparsity": 0.8758350873905482, "compression_loss": 154.56114196777344, "distillation_loss": 7.8387908935546875, "epoch": 1.66, "learning_rate": 3.8123415046491974e-05, "loss": 160.938, "step": 1967, "task_loss": 4.294440269470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4421616192525615, "compression/movement_sparsity/importance_threshold": -0.0001759797284149999, "compression/movement_sparsity/linear_layer_sparsity": 0.9072113740914738, "compression/movement_sparsity/model_sparsity": 0.876045872482803, "compression_loss": 154.60650634765625, "distillation_loss": 5.65949010848999, "epoch": 1.66, "learning_rate": 3.811737712836614e-05, "loss": 161.1363, "step": 1968, "task_loss": 2.6471495628356934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.442594323542253, "compression/movement_sparsity/importance_threshold": -0.00017466317732211124, "compression/movement_sparsity/linear_layer_sparsity": 0.9073294114269013, "compression/movement_sparsity/model_sparsity": 0.8761598548726331, "compression_loss": 154.65162658691406, "distillation_loss": 6.765289306640625, "epoch": 1.66, "learning_rate": 3.811133921024031e-05, "loss": 160.5359, "step": 1969, "task_loss": 3.3650200366973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4430248643150148, "compression/movement_sparsity/importance_threshold": -0.00017335320897093887, "compression/movement_sparsity/linear_layer_sparsity": 0.9074776049822797, "compression/movement_sparsity/model_sparsity": 0.8763029575234877, "compression_loss": 154.6966094970703, "distillation_loss": 7.206822395324707, "epoch": 1.66, "learning_rate": 3.810530129211448e-05, "loss": 160.9106, "step": 1970, "task_loss": 3.7178566455841064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4434532469931947, "compression/movement_sparsity/importance_threshold": -0.0001720498068633839, "compression/movement_sparsity/linear_layer_sparsity": 0.9076354094167728, "compression/movement_sparsity/model_sparsity": 0.8764553408901927, "compression_loss": 154.7411651611328, "distillation_loss": 6.287899494171143, "epoch": 1.67, "learning_rate": 3.809926337398865e-05, "loss": 161.1329, "step": 1971, "task_loss": 3.068429470062256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4438794769991412, "compression/movement_sparsity/importance_threshold": -0.00017075295450134486, "compression/movement_sparsity/linear_layer_sparsity": 0.9077381122726205, "compression/movement_sparsity/model_sparsity": 0.876554515586991, "compression_loss": 154.78558349609375, "distillation_loss": 6.153408050537109, "epoch": 1.67, "learning_rate": 3.809322545586282e-05, "loss": 161.2242, "step": 1972, "task_loss": 2.855734348297119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4443035597552023, "compression/movement_sparsity/importance_threshold": -0.00016946263538672457, "compression/movement_sparsity/linear_layer_sparsity": 0.9078229765736848, "compression/movement_sparsity/model_sparsity": 0.8766364645382408, "compression_loss": 154.82969665527344, "distillation_loss": 6.8665056228637695, "epoch": 1.67, "learning_rate": 3.808718753773699e-05, "loss": 161.7839, "step": 1973, "task_loss": 4.93058967590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4447255006837267, "compression/movement_sparsity/importance_threshold": -0.00016817883302142154, "compression/movement_sparsity/linear_layer_sparsity": 0.9080460062051461, "compression/movement_sparsity/model_sparsity": 0.8768518324157419, "compression_loss": 154.87353515625, "distillation_loss": 7.2077226638793945, "epoch": 1.67, "learning_rate": 3.8081149619611165e-05, "loss": 161.2269, "step": 1974, "task_loss": 3.149751901626587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4451453052070622, "compression/movement_sparsity/importance_threshold": -0.000166901530907336, "compression/movement_sparsity/linear_layer_sparsity": 0.9082528070444548, "compression/movement_sparsity/model_sparsity": 0.8770515290100266, "compression_loss": 154.9173583984375, "distillation_loss": 6.691270351409912, "epoch": 1.67, "learning_rate": 3.8075111701485325e-05, "loss": 160.9845, "step": 1975, "task_loss": 3.401458501815796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.445562978747557, "compression/movement_sparsity/importance_threshold": -0.00016563071254636994, "compression/movement_sparsity/linear_layer_sparsity": 0.9083947761843274, "compression/movement_sparsity/model_sparsity": 0.8771886210731964, "compression_loss": 154.96072387695312, "distillation_loss": 8.148401260375977, "epoch": 1.67, "learning_rate": 3.80690737833595e-05, "loss": 161.3526, "step": 1976, "task_loss": 4.160513877868652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.445978526727559, "compression/movement_sparsity/importance_threshold": -0.0001643663614404236, "compression/movement_sparsity/linear_layer_sparsity": 0.9086244475771618, "compression/movement_sparsity/model_sparsity": 0.8774104025471351, "compression_loss": 155.00384521484375, "distillation_loss": 7.9408063888549805, "epoch": 1.67, "learning_rate": 3.806303586523367e-05, "loss": 161.2826, "step": 1977, "task_loss": 3.8156015872955322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.446391954569417, "compression/movement_sparsity/importance_threshold": -0.00016310846109139632, "compression/movement_sparsity/linear_layer_sparsity": 0.9089127977989322, "compression/movement_sparsity/model_sparsity": 0.8776888470517187, "compression_loss": 155.04678344726562, "distillation_loss": 6.890384197235107, "epoch": 1.67, "learning_rate": 3.8056997947107834e-05, "loss": 162.2542, "step": 1978, "task_loss": 3.1062464714050293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4468032676954792, "compression/movement_sparsity/importance_threshold": -0.00016185699500118923, "compression/movement_sparsity/linear_layer_sparsity": 0.9091257515087411, "compression/movement_sparsity/model_sparsity": 0.8778944851464734, "compression_loss": 155.0895233154297, "distillation_loss": 6.170140266418457, "epoch": 1.67, "learning_rate": 3.805096002898201e-05, "loss": 160.7493, "step": 1979, "task_loss": 3.550039291381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.447212471528093, "compression/movement_sparsity/importance_threshold": -0.00016061194667170257, "compression/movement_sparsity/linear_layer_sparsity": 0.9093343528973629, "compression/movement_sparsity/model_sparsity": 0.8780959204356631, "compression_loss": 155.13204956054688, "distillation_loss": 6.297365665435791, "epoch": 1.67, "learning_rate": 3.804492211085618e-05, "loss": 161.6006, "step": 1980, "task_loss": 2.8500585556030273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4476195714896074, "compression/movement_sparsity/importance_threshold": -0.00015937329960483745, "compression/movement_sparsity/linear_layer_sparsity": 0.9094703838017528, "compression/movement_sparsity/model_sparsity": 0.8782272782600071, "compression_loss": 155.1742401123047, "distillation_loss": 6.63515043258667, "epoch": 1.67, "learning_rate": 3.803888419273035e-05, "loss": 161.8307, "step": 1981, "task_loss": 3.8548760414123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4480245730023702, "compression/movement_sparsity/importance_threshold": -0.0001581410373024941, "compression/movement_sparsity/linear_layer_sparsity": 0.9097322265988688, "compression/movement_sparsity/model_sparsity": 0.8784801259515195, "compression_loss": 155.21630859375, "distillation_loss": 7.911506652832031, "epoch": 1.67, "learning_rate": 3.8032846274604516e-05, "loss": 161.9871, "step": 1982, "task_loss": 3.444103717803955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.44842748148873, "compression/movement_sparsity/importance_threshold": -0.00015691514326657188, "compression/movement_sparsity/linear_layer_sparsity": 0.9098883258773898, "compression/movement_sparsity/model_sparsity": 0.8786308627396058, "compression_loss": 155.2582244873047, "distillation_loss": 7.340427875518799, "epoch": 1.68, "learning_rate": 3.802680835647869e-05, "loss": 161.4076, "step": 1983, "task_loss": 2.403489828109741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4488283023710342, "compression/movement_sparsity/importance_threshold": -0.00015569560099897278, "compression/movement_sparsity/linear_layer_sparsity": 0.9100581498728597, "compression/movement_sparsity/model_sparsity": 0.8787948527583916, "compression_loss": 155.29989624023438, "distillation_loss": 5.912631034851074, "epoch": 1.68, "learning_rate": 3.802077043835286e-05, "loss": 161.1805, "step": 1984, "task_loss": 3.3173036575317383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.449227041071632, "compression/movement_sparsity/importance_threshold": -0.00015448239400159703, "compression/movement_sparsity/linear_layer_sparsity": 0.9102638298404108, "compression/movement_sparsity/model_sparsity": 0.8789934669863118, "compression_loss": 155.3412628173828, "distillation_loss": 7.287107467651367, "epoch": 1.68, "learning_rate": 3.8014732520227024e-05, "loss": 161.688, "step": 1985, "task_loss": 4.141517162322998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4496237030128707, "compression/movement_sparsity/importance_threshold": -0.000153275505776344, "compression/movement_sparsity/linear_layer_sparsity": 0.9104080049512959, "compression/movement_sparsity/model_sparsity": 0.8791326892386035, "compression_loss": 155.38246154785156, "distillation_loss": 5.387873649597168, "epoch": 1.68, "learning_rate": 3.80086946021012e-05, "loss": 161.1354, "step": 1986, "task_loss": 2.9147231578826904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.450018293617099, "compression/movement_sparsity/importance_threshold": -0.00015207491982511482, "compression/movement_sparsity/linear_layer_sparsity": 0.9106325966487174, "compression/movement_sparsity/model_sparsity": 0.8793495655202936, "compression_loss": 155.42344665527344, "distillation_loss": 5.369961261749268, "epoch": 1.68, "learning_rate": 3.8002656683975365e-05, "loss": 162.2061, "step": 1987, "task_loss": 3.8838868141174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4504108183066649, "compression/movement_sparsity/importance_threshold": -0.0001508806196498097, "compression/movement_sparsity/linear_layer_sparsity": 0.910857546071168, "compression/movement_sparsity/model_sparsity": 0.8795667872380577, "compression_loss": 155.46417236328125, "distillation_loss": 5.209074974060059, "epoch": 1.68, "learning_rate": 3.799661876584953e-05, "loss": 161.5251, "step": 1988, "task_loss": 2.7505199909210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.450801282503917, "compression/movement_sparsity/importance_threshold": -0.00014969258875232976, "compression/movement_sparsity/linear_layer_sparsity": 0.9110679122365999, "compression/movement_sparsity/model_sparsity": 0.879769926678545, "compression_loss": 155.50448608398438, "distillation_loss": 8.447084426879883, "epoch": 1.68, "learning_rate": 3.7990580847723706e-05, "loss": 161.8272, "step": 1989, "task_loss": 4.0983171463012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4511896916312026, "compression/movement_sparsity/importance_threshold": -0.0001485108106345761, "compression/movement_sparsity/linear_layer_sparsity": 0.9113143967058008, "compression/movement_sparsity/model_sparsity": 0.880007943647954, "compression_loss": 155.5447998046875, "distillation_loss": 6.900386810302734, "epoch": 1.68, "learning_rate": 3.798454292959788e-05, "loss": 162.2076, "step": 1990, "task_loss": 4.0427422523498535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4515760511108708, "compression/movement_sparsity/importance_threshold": -0.00014733526879844722, "compression/movement_sparsity/linear_layer_sparsity": 0.9114136892497047, "compression/movement_sparsity/model_sparsity": 0.8801038251875151, "compression_loss": 155.58474731445312, "distillation_loss": 7.763065338134766, "epoch": 1.68, "learning_rate": 3.797850501147205e-05, "loss": 161.4554, "step": 1991, "task_loss": 3.791189193725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4519603663652696, "compression/movement_sparsity/importance_threshold": -0.00014616594674584336, "compression/movement_sparsity/linear_layer_sparsity": 0.9116688187404416, "compression/movement_sparsity/model_sparsity": 0.8803501901953751, "compression_loss": 155.62451171875, "distillation_loss": 6.4316630363464355, "epoch": 1.68, "learning_rate": 3.7972467093346215e-05, "loss": 161.9414, "step": 1992, "task_loss": 2.49525785446167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4523426428167467, "compression/movement_sparsity/importance_threshold": -0.00014500282797866824, "compression/movement_sparsity/linear_layer_sparsity": 0.9119114159309932, "compression/movement_sparsity/model_sparsity": 0.880584453426115, "compression_loss": 155.66416931152344, "distillation_loss": 5.468949317932129, "epoch": 1.68, "learning_rate": 3.796642917522039e-05, "loss": 162.2815, "step": 1993, "task_loss": 2.1942763328552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.452722885887651, "compression/movement_sparsity/importance_threshold": -0.00014384589599881863, "compression/movement_sparsity/linear_layer_sparsity": 0.912071354791493, "compression/movement_sparsity/model_sparsity": 0.8807388978947271, "compression_loss": 155.7034912109375, "distillation_loss": 6.161131381988525, "epoch": 1.69, "learning_rate": 3.7960391257094556e-05, "loss": 161.9526, "step": 1994, "task_loss": 2.6343321800231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4531011010003303, "compression/movement_sparsity/importance_threshold": -0.00014269513430819735, "compression/movement_sparsity/linear_layer_sparsity": 0.912288267324957, "compression/movement_sparsity/model_sparsity": 0.8809483588153657, "compression_loss": 155.74256896972656, "distillation_loss": 5.467075347900391, "epoch": 1.69, "learning_rate": 3.795435333896872e-05, "loss": 161.9772, "step": 1995, "task_loss": 2.2041709423065186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4534772935771327, "compression/movement_sparsity/importance_threshold": -0.0001415505264087038, "compression/movement_sparsity/linear_layer_sparsity": 0.9124986692628918, "compression/movement_sparsity/model_sparsity": 0.8811515327994603, "compression_loss": 155.781494140625, "distillation_loss": 6.376107215881348, "epoch": 1.69, "learning_rate": 3.79483154208429e-05, "loss": 161.4582, "step": 1996, "task_loss": 3.3445024490356445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4538514690404065, "compression/movement_sparsity/importance_threshold": -0.00014041205580223818, "compression/movement_sparsity/linear_layer_sparsity": 0.9127566247813584, "compression/movement_sparsity/model_sparsity": 0.8814006267523037, "compression_loss": 155.82015991210938, "distillation_loss": 5.690335273742676, "epoch": 1.69, "learning_rate": 3.7942277502717064e-05, "loss": 161.511, "step": 1997, "task_loss": 2.5769665241241455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4542236328125, "compression/movement_sparsity/importance_threshold": -0.0001392797059907025, "compression/movement_sparsity/linear_layer_sparsity": 0.9129653573358243, "compression/movement_sparsity/model_sparsity": 0.8816021887013871, "compression_loss": 155.858642578125, "distillation_loss": 5.2250237464904785, "epoch": 1.69, "learning_rate": 3.793623958459123e-05, "loss": 162.0871, "step": 1998, "task_loss": 2.960421562194824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4545937903157613, "compression/movement_sparsity/importance_threshold": -0.00013815346047599524, "compression/movement_sparsity/linear_layer_sparsity": 0.9131690936640506, "compression/movement_sparsity/model_sparsity": 0.8817989260599726, "compression_loss": 155.8968048095703, "distillation_loss": 6.497771263122559, "epoch": 1.69, "learning_rate": 3.7930201666465405e-05, "loss": 162.3331, "step": 1999, "task_loss": 3.3142573833465576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4549619469725386, "compression/movement_sparsity/importance_threshold": -0.00013703330276001926, "compression/movement_sparsity/linear_layer_sparsity": 0.9133682868844077, "compression/movement_sparsity/model_sparsity": 0.8819912763804204, "compression_loss": 155.93484497070312, "distillation_loss": 6.367579460144043, "epoch": 1.69, "learning_rate": 3.792416374833957e-05, "loss": 162.4195, "step": 2000, "task_loss": 2.366534948348999 }, { "epoch": 1.69, "eval_accuracy": 0.3372673267326733, "eval_loss": 162.77630615234375, "eval_runtime": 310.0412, "eval_samples_per_second": 81.441, "eval_steps_per_second": 0.639, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.45532810820518, "compression/movement_sparsity/importance_threshold": -0.00013591921634467218, "compression/movement_sparsity/linear_layer_sparsity": 0.913518257140764, "compression/movement_sparsity/model_sparsity": 0.8821360946971083, "compression_loss": 155.97264099121094, "distillation_loss": 7.12130069732666, "epoch": 1.69, "learning_rate": 3.7918125830213746e-05, "loss": 163.1065, "step": 2001, "task_loss": 3.3076558113098145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.455692279436034, "compression/movement_sparsity/importance_threshold": -0.00013481118473185685, "compression/movement_sparsity/linear_layer_sparsity": 0.9137163771860338, "compression/movement_sparsity/model_sparsity": 0.8823274087093347, "compression_loss": 156.01026916503906, "distillation_loss": 7.194187641143799, "epoch": 1.69, "learning_rate": 3.7912087912087914e-05, "loss": 162.1262, "step": 2002, "task_loss": 3.8227150440216064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4560544660874486, "compression/movement_sparsity/importance_threshold": -0.00013370919142347264, "compression/movement_sparsity/linear_layer_sparsity": 0.9139006175001755, "compression/movement_sparsity/model_sparsity": 0.8825053198018961, "compression_loss": 156.047607421875, "distillation_loss": 6.145810604095459, "epoch": 1.69, "learning_rate": 3.790604999396208e-05, "loss": 162.3746, "step": 2003, "task_loss": 3.821897506713867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4564146735817722, "compression/movement_sparsity/importance_threshold": -0.00013261321992141892, "compression/movement_sparsity/linear_layer_sparsity": 0.9140687959605117, "compression/movement_sparsity/model_sparsity": 0.8826677208147423, "compression_loss": 156.08482360839844, "distillation_loss": 5.829546928405762, "epoch": 1.69, "learning_rate": 3.7900012075836255e-05, "loss": 162.0737, "step": 2004, "task_loss": 3.414478063583374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4567729073413525, "compression/movement_sparsity/importance_threshold": -0.00013152325372759854, "compression/movement_sparsity/linear_layer_sparsity": 0.9143393553241694, "compression/movement_sparsity/model_sparsity": 0.8829289856319205, "compression_loss": 156.12173461914062, "distillation_loss": 8.78770637512207, "epoch": 1.69, "learning_rate": 3.789397415771042e-05, "loss": 163.4461, "step": 2005, "task_loss": 4.179129123687744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4571291727885378, "compression/movement_sparsity/importance_threshold": -0.00013043927634391085, "compression/movement_sparsity/linear_layer_sparsity": 0.9144861656761022, "compression/movement_sparsity/model_sparsity": 0.8830707525966228, "compression_loss": 156.158447265625, "distillation_loss": 8.001663208007812, "epoch": 1.7, "learning_rate": 3.7887936239584596e-05, "loss": 163.169, "step": 2006, "task_loss": 4.638307094573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.457483475345677, "compression/movement_sparsity/importance_threshold": -0.0001293612712722561, "compression/movement_sparsity/linear_layer_sparsity": 0.9146587322301284, "compression/movement_sparsity/model_sparsity": 0.8832373909586414, "compression_loss": 156.19508361816406, "distillation_loss": 8.28236198425293, "epoch": 1.7, "learning_rate": 3.788189832145876e-05, "loss": 162.8239, "step": 2007, "task_loss": 3.3667547702789307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4578358204351174, "compression/movement_sparsity/importance_threshold": -0.00012828922201453454, "compression/movement_sparsity/linear_layer_sparsity": 0.9149671746743653, "compression/movement_sparsity/model_sparsity": 0.8835352374560388, "compression_loss": 156.2313232421875, "distillation_loss": 6.896763801574707, "epoch": 1.7, "learning_rate": 3.787586040333293e-05, "loss": 162.4879, "step": 2008, "task_loss": 4.932947635650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4581862134792076, "compression/movement_sparsity/importance_threshold": -0.00012722311207264726, "compression/movement_sparsity/linear_layer_sparsity": 0.9151260522839455, "compression/movement_sparsity/model_sparsity": 0.8836886571309652, "compression_loss": 156.26756286621094, "distillation_loss": 5.693926811218262, "epoch": 1.7, "learning_rate": 3.7869822485207104e-05, "loss": 162.8252, "step": 2009, "task_loss": 4.165630340576172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.458534659900296, "compression/movement_sparsity/importance_threshold": -0.0001261629249484945, "compression/movement_sparsity/linear_layer_sparsity": 0.9153460531768272, "compression/movement_sparsity/model_sparsity": 0.8839011003163746, "compression_loss": 156.30340576171875, "distillation_loss": 5.526670932769775, "epoch": 1.7, "learning_rate": 3.786378456708127e-05, "loss": 162.3079, "step": 2010, "task_loss": 3.1792430877685547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4588811651207303, "compression/movement_sparsity/importance_threshold": -0.00012510864414397652, "compression/movement_sparsity/linear_layer_sparsity": 0.9154999941810062, "compression/movement_sparsity/model_sparsity": 0.8840497529734821, "compression_loss": 156.33914184570312, "distillation_loss": 6.163922309875488, "epoch": 1.7, "learning_rate": 3.7857746648955445e-05, "loss": 162.1426, "step": 2011, "task_loss": 2.2184555530548096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.459225734562859, "compression/movement_sparsity/importance_threshold": -0.00012406025316099353, "compression/movement_sparsity/linear_layer_sparsity": 0.9157316330615005, "compression/movement_sparsity/model_sparsity": 0.8842734343458268, "compression_loss": 156.37466430664062, "distillation_loss": 6.6913604736328125, "epoch": 1.7, "learning_rate": 3.785170873082961e-05, "loss": 163.211, "step": 2012, "task_loss": 2.4694747924804688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4595683736490304, "compression/movement_sparsity/importance_threshold": -0.00012301773550144664, "compression/movement_sparsity/linear_layer_sparsity": 0.9159080272733378, "compression/movement_sparsity/model_sparsity": 0.8844437688738355, "compression_loss": 156.40988159179688, "distillation_loss": 7.296276092529297, "epoch": 1.7, "learning_rate": 3.784567081270378e-05, "loss": 162.1593, "step": 2013, "task_loss": 2.879027843475342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4599090878015926, "compression/movement_sparsity/importance_threshold": -0.00012198107466723611, "compression/movement_sparsity/linear_layer_sparsity": 0.9159969696397336, "compression/movement_sparsity/model_sparsity": 0.8845296557963269, "compression_loss": 156.44503784179688, "distillation_loss": 6.6902947425842285, "epoch": 1.7, "learning_rate": 3.7839632894577954e-05, "loss": 162.1855, "step": 2014, "task_loss": 2.551546335220337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4602478824428933, "compression/movement_sparsity/importance_threshold": -0.00012095025416026303, "compression/movement_sparsity/linear_layer_sparsity": 0.9162229803131038, "compression/movement_sparsity/model_sparsity": 0.8847479023077767, "compression_loss": 156.4798583984375, "distillation_loss": 6.725427627563477, "epoch": 1.7, "learning_rate": 3.783359497645212e-05, "loss": 162.7022, "step": 2015, "task_loss": 2.828561544418335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4605847629952815, "compression/movement_sparsity/importance_threshold": -0.00011992525748242677, "compression/movement_sparsity/linear_layer_sparsity": 0.9163561136447582, "compression/movement_sparsity/model_sparsity": 0.8848764620999227, "compression_loss": 156.5145721435547, "distillation_loss": 7.439986228942871, "epoch": 1.7, "learning_rate": 3.782755705832629e-05, "loss": 163.9552, "step": 2016, "task_loss": 4.732082843780518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4609197348811052, "compression/movement_sparsity/importance_threshold": -0.00011890606813562757, "compression/movement_sparsity/linear_layer_sparsity": 0.9165757091159403, "compression/movement_sparsity/model_sparsity": 0.885088513791115, "compression_loss": 156.54904174804688, "distillation_loss": 6.835080623626709, "epoch": 1.7, "learning_rate": 3.782151914020046e-05, "loss": 163.0522, "step": 2017, "task_loss": 4.46486234664917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4612528035227121, "compression/movement_sparsity/importance_threshold": -0.00011789266962176741, "compression/movement_sparsity/linear_layer_sparsity": 0.9167404772643326, "compression/movement_sparsity/model_sparsity": 0.8852476216467239, "compression_loss": 156.58331298828125, "distillation_loss": 7.157663822174072, "epoch": 1.71, "learning_rate": 3.781548122207463e-05, "loss": 163.2967, "step": 2018, "task_loss": 3.536332130432129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4615839743424508, "compression/movement_sparsity/importance_threshold": -0.00011688504544274479, "compression/movement_sparsity/linear_layer_sparsity": 0.9169146297326545, "compression/movement_sparsity/model_sparsity": 0.8854157914420032, "compression_loss": 156.61752319335938, "distillation_loss": 7.78938627243042, "epoch": 1.71, "learning_rate": 3.7809443303948796e-05, "loss": 163.1402, "step": 2019, "task_loss": 4.093336582183838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4619132527626695, "compression/movement_sparsity/importance_threshold": -0.00011588317910046254, "compression/movement_sparsity/linear_layer_sparsity": 0.9170125748456154, "compression/movement_sparsity/model_sparsity": 0.8855103718390196, "compression_loss": 156.65145874023438, "distillation_loss": 7.758843421936035, "epoch": 1.71, "learning_rate": 3.780340538582297e-05, "loss": 163.636, "step": 2020, "task_loss": 3.7144312858581543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4622406442057163, "compression/movement_sparsity/importance_threshold": -0.00011488705409681919, "compression/movement_sparsity/linear_layer_sparsity": 0.917101755695364, "compression/movement_sparsity/model_sparsity": 0.8855964890522269, "compression_loss": 156.68531799316406, "distillation_loss": 6.799404144287109, "epoch": 1.71, "learning_rate": 3.7797367467697144e-05, "loss": 163.1987, "step": 2021, "task_loss": 3.8739829063415527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4625661540939394, "compression/movement_sparsity/importance_threshold": -0.00011389665393371582, "compression/movement_sparsity/linear_layer_sparsity": 0.9172955949644527, "compression/movement_sparsity/model_sparsity": 0.885783669346103, "compression_loss": 156.71885681152344, "distillation_loss": 7.377967834472656, "epoch": 1.71, "learning_rate": 3.779132954957131e-05, "loss": 163.2695, "step": 2022, "task_loss": 4.174900054931641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.462889787849687, "compression/movement_sparsity/importance_threshold": -0.00011291196211305268, "compression/movement_sparsity/linear_layer_sparsity": 0.9174443966523805, "compression/movement_sparsity/model_sparsity": 0.885927359238283, "compression_loss": 156.75222778320312, "distillation_loss": 6.577198028564453, "epoch": 1.71, "learning_rate": 3.778529163144548e-05, "loss": 162.9832, "step": 2023, "task_loss": 3.614705801010132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4632115508953074, "compression/movement_sparsity/importance_threshold": -0.00011193296213673088, "compression/movement_sparsity/linear_layer_sparsity": 0.9175784839174458, "compression/movement_sparsity/model_sparsity": 0.8860568401932927, "compression_loss": 156.78543090820312, "distillation_loss": 8.962401390075684, "epoch": 1.71, "learning_rate": 3.777925371331965e-05, "loss": 163.4442, "step": 2024, "task_loss": 4.668178558349609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4635314486531485, "compression/movement_sparsity/importance_threshold": -0.00011095963750664978, "compression/movement_sparsity/linear_layer_sparsity": 0.9177568933136137, "compression/movement_sparsity/model_sparsity": 0.8862291206778505, "compression_loss": 156.81838989257812, "distillation_loss": 7.469170093536377, "epoch": 1.71, "learning_rate": 3.777321579519382e-05, "loss": 163.1041, "step": 2025, "task_loss": 3.1402318477630615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4638494865455587, "compression/movement_sparsity/importance_threshold": -0.00010999197172471137, "compression/movement_sparsity/linear_layer_sparsity": 0.9179160690273848, "compression/movement_sparsity/model_sparsity": 0.8863828282161719, "compression_loss": 156.85110473632812, "distillation_loss": 5.778111934661865, "epoch": 1.71, "learning_rate": 3.776717787706799e-05, "loss": 162.3084, "step": 2026, "task_loss": 3.376955986022949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4641656699948862, "compression/movement_sparsity/importance_threshold": -0.000109029948292815, "compression/movement_sparsity/linear_layer_sparsity": 0.9180500370507737, "compression/movement_sparsity/model_sparsity": 0.8865121940258236, "compression_loss": 156.8836669921875, "distillation_loss": 7.7392578125, "epoch": 1.71, "learning_rate": 3.776113995894216e-05, "loss": 163.0562, "step": 2027, "task_loss": 4.390015125274658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4644800044234794, "compression/movement_sparsity/importance_threshold": -0.0001080735507128618, "compression/movement_sparsity/linear_layer_sparsity": 0.9182072452768849, "compression/movement_sparsity/model_sparsity": 0.8866640016657388, "compression_loss": 156.9159393310547, "distillation_loss": 8.972311973571777, "epoch": 1.71, "learning_rate": 3.775510204081633e-05, "loss": 163.8257, "step": 2028, "task_loss": 3.9244768619537354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4647924952536862, "compression/movement_sparsity/importance_threshold": -0.00010712276248675111, "compression/movement_sparsity/linear_layer_sparsity": 0.9183535548137769, "compression/movement_sparsity/model_sparsity": 0.8868052850199378, "compression_loss": 156.9480743408203, "distillation_loss": 6.731687545776367, "epoch": 1.71, "learning_rate": 3.7749064122690495e-05, "loss": 163.0141, "step": 2029, "task_loss": 3.1125693321228027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4651031479078545, "compression/movement_sparsity/importance_threshold": -0.00010617756711638494, "compression/movement_sparsity/linear_layer_sparsity": 0.9184603238107885, "compression/movement_sparsity/model_sparsity": 0.8869083861734421, "compression_loss": 156.97998046875, "distillation_loss": 7.222890853881836, "epoch": 1.72, "learning_rate": 3.774302620456467e-05, "loss": 163.2517, "step": 2030, "task_loss": 3.2845473289489746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4654119678083333, "compression/movement_sparsity/importance_threshold": -0.00010523794810366176, "compression/movement_sparsity/linear_layer_sparsity": 0.9186254616083775, "compression/movement_sparsity/model_sparsity": 0.8870678509796608, "compression_loss": 157.01168823242188, "distillation_loss": 6.474638938903809, "epoch": 1.72, "learning_rate": 3.773698828643884e-05, "loss": 163.4867, "step": 2031, "task_loss": 3.074014663696289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4657189603774703, "compression/movement_sparsity/importance_threshold": -0.00010430388895048357, "compression/movement_sparsity/linear_layer_sparsity": 0.918727973677543, "compression/movement_sparsity/model_sparsity": 0.8871668414438864, "compression_loss": 157.04327392578125, "distillation_loss": 6.977015495300293, "epoch": 1.72, "learning_rate": 3.7730950368313004e-05, "loss": 163.6655, "step": 2032, "task_loss": 3.273977756500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4660241310376136, "compression/movement_sparsity/importance_threshold": -0.0001033753731587506, "compression/movement_sparsity/linear_layer_sparsity": 0.91881262334359, "compression/movement_sparsity/model_sparsity": 0.8872485831334919, "compression_loss": 157.07443237304688, "distillation_loss": 8.870085716247559, "epoch": 1.72, "learning_rate": 3.772491245018718e-05, "loss": 163.3332, "step": 2033, "task_loss": 4.21150016784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4663274852111117, "compression/movement_sparsity/importance_threshold": -0.00010245238423036394, "compression/movement_sparsity/linear_layer_sparsity": 0.9189579431745682, "compression/movement_sparsity/model_sparsity": 0.88738891078122, "compression_loss": 157.1055450439453, "distillation_loss": 6.959296226501465, "epoch": 1.72, "learning_rate": 3.771887453206135e-05, "loss": 163.528, "step": 2034, "task_loss": 3.035745620727539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4666290283203125, "compression/movement_sparsity/importance_threshold": -0.00010153490566722212, "compression/movement_sparsity/linear_layer_sparsity": 0.9191849793263551, "compression/movement_sparsity/model_sparsity": 0.887608147542748, "compression_loss": 157.13644409179688, "distillation_loss": 5.344354629516602, "epoch": 1.72, "learning_rate": 3.771283661393551e-05, "loss": 163.2162, "step": 2035, "task_loss": 2.107182502746582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4669287657875645, "compression/movement_sparsity/importance_threshold": -0.0001006229209712271, "compression/movement_sparsity/linear_layer_sparsity": 0.9193297387214545, "compression/movement_sparsity/model_sparsity": 0.8877479340072937, "compression_loss": 157.16722106933594, "distillation_loss": 6.175367832183838, "epoch": 1.72, "learning_rate": 3.7706798695809686e-05, "loss": 163.6404, "step": 2036, "task_loss": 3.0417799949645996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4672267030352155, "compression/movement_sparsity/importance_threshold": -9.971641364427911e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194868754025599, "compression/movement_sparsity/model_sparsity": 0.8878996725599941, "compression_loss": 157.19764709472656, "distillation_loss": 7.253015518188477, "epoch": 1.72, "learning_rate": 3.770076077768386e-05, "loss": 164.3176, "step": 2037, "task_loss": 4.325547695159912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4675228454856142, "compression/movement_sparsity/importance_threshold": -9.881536718827841e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9195677451074663, "compression/movement_sparsity/model_sparsity": 0.8879777641417528, "compression_loss": 157.2279510498047, "distillation_loss": 7.390590667724609, "epoch": 1.72, "learning_rate": 3.769472285955802e-05, "loss": 164.2178, "step": 2038, "task_loss": 3.8896913528442383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4678171985611084, "compression/movement_sparsity/importance_threshold": -9.791976510512609e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9197364363070109, "compression/movement_sparsity/model_sparsity": 0.8881406602796381, "compression_loss": 157.25808715820312, "distillation_loss": 6.069460868835449, "epoch": 1.72, "learning_rate": 3.7688684941432194e-05, "loss": 163.5156, "step": 2039, "task_loss": 2.217817544937134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4681097676840464, "compression/movement_sparsity/importance_threshold": -9.702959089672065e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9199007871095359, "compression/movement_sparsity/model_sparsity": 0.8882993651264942, "compression_loss": 157.28807067871094, "distillation_loss": 6.967180252075195, "epoch": 1.72, "learning_rate": 3.768264702330637e-05, "loss": 164.5535, "step": 2040, "task_loss": 3.6561732292175293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4684005582767765, "compression/movement_sparsity/importance_threshold": -9.614482806496582e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200337773511787, "compression/movement_sparsity/model_sparsity": 0.8884277867442107, "compression_loss": 157.31784057617188, "distillation_loss": 6.5490522384643555, "epoch": 1.72, "learning_rate": 3.7676609105180535e-05, "loss": 163.5856, "step": 2041, "task_loss": 3.040182113647461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4686895757616467, "compression/movement_sparsity/importance_threshold": -9.526546011175922e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920130076929006, "compression/movement_sparsity/model_sparsity": 0.8885207781352875, "compression_loss": 157.34742736816406, "distillation_loss": 7.280935764312744, "epoch": 1.73, "learning_rate": 3.76705711870547e-05, "loss": 164.0972, "step": 2042, "task_loss": 4.1884846687316895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4689768255610054, "compression/movement_sparsity/importance_threshold": -9.439147053900195e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202608254271333, "compression/movement_sparsity/model_sparsity": 0.8886470350202746, "compression_loss": 157.37689208984375, "distillation_loss": 8.016792297363281, "epoch": 1.73, "learning_rate": 3.7664533268928877e-05, "loss": 164.4515, "step": 2043, "task_loss": 4.070981025695801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4692623130972007, "compression/movement_sparsity/importance_threshold": -9.3522842848596e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920360117971037, "compression/movement_sparsity/model_sparsity": 0.8887429165598357, "compression_loss": 157.4061737060547, "distillation_loss": 7.001385688781738, "epoch": 1.73, "learning_rate": 3.7658495350803044e-05, "loss": 163.9293, "step": 2044, "task_loss": 2.102085828781128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4695460437925807, "compression/movement_sparsity/importance_threshold": -9.2659560542439e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9205359278986602, "compression/movement_sparsity/model_sparsity": 0.8889126868755904, "compression_loss": 157.43533325195312, "distillation_loss": 6.396998405456543, "epoch": 1.73, "learning_rate": 3.765245743267721e-05, "loss": 164.1069, "step": 2045, "task_loss": 2.826390504837036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4698280230694938, "compression/movement_sparsity/importance_threshold": -9.180160712243465e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206228670048933, "compression/movement_sparsity/model_sparsity": 0.8889966393560684, "compression_loss": 157.4640655517578, "distillation_loss": 6.79403018951416, "epoch": 1.73, "learning_rate": 3.7646419514551385e-05, "loss": 164.2619, "step": 2046, "task_loss": 2.982492685317993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4701082563502879, "compression/movement_sparsity/importance_threshold": -9.094896609048147e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9207448989364787, "compression/movement_sparsity/model_sparsity": 0.8891144791153898, "compression_loss": 157.49285888671875, "distillation_loss": 6.070186614990234, "epoch": 1.73, "learning_rate": 3.764038159642556e-05, "loss": 163.9935, "step": 2047, "task_loss": 2.507523775100708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4703867490573115, "compression/movement_sparsity/importance_threshold": -9.010162094847968e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209042296644291, "compression/movement_sparsity/model_sparsity": 0.8892683363426764, "compression_loss": 157.521484375, "distillation_loss": 7.141257286071777, "epoch": 1.73, "learning_rate": 3.763434367829972e-05, "loss": 164.0591, "step": 2048, "task_loss": 3.3685402870178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4706635066129126, "compression/movement_sparsity/importance_threshold": -8.92595551983304e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211059269599897, "compression/movement_sparsity/model_sparsity": 0.8894631047156412, "compression_loss": 157.54974365234375, "distillation_loss": 6.28179407119751, "epoch": 1.73, "learning_rate": 3.762830576017389e-05, "loss": 163.2027, "step": 2049, "task_loss": 3.0805306434631348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4709385344394397, "compression/movement_sparsity/importance_threshold": -8.842275234193474e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212482776732267, "compression/movement_sparsity/model_sparsity": 0.8896005652439564, "compression_loss": 157.57791137695312, "distillation_loss": 8.601505279541016, "epoch": 1.73, "learning_rate": 3.762226784204807e-05, "loss": 164.8551, "step": 2050, "task_loss": 3.607563018798828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4712118379592405, "compression/movement_sparsity/importance_threshold": -8.759119588119205e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9214339488874846, "compression/movement_sparsity/model_sparsity": 0.8897798580808132, "compression_loss": 157.60595703125, "distillation_loss": 6.859668254852295, "epoch": 1.73, "learning_rate": 3.7616229923922234e-05, "loss": 164.269, "step": 2051, "task_loss": 2.931633949279785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4714834225946638, "compression/movement_sparsity/importance_threshold": -8.676486931800259e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9215043253248716, "compression/movement_sparsity/model_sparsity": 0.8898478168710725, "compression_loss": 157.63377380371094, "distillation_loss": 6.47484016418457, "epoch": 1.73, "learning_rate": 3.76101920057964e-05, "loss": 163.8806, "step": 2052, "task_loss": 2.7673532962799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.471753293768057, "compression/movement_sparsity/importance_threshold": -8.594375615426832e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9216246401763174, "compression/movement_sparsity/model_sparsity": 0.8899639985372394, "compression_loss": 157.66146850585938, "distillation_loss": 7.326955795288086, "epoch": 1.73, "learning_rate": 3.7604154087670576e-05, "loss": 164.5384, "step": 2053, "task_loss": 3.176683187484741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.472021456901769, "compression/movement_sparsity/importance_threshold": -8.512783989188862e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9217608380190543, "compression/movement_sparsity/model_sparsity": 0.8900955175650846, "compression_loss": 157.6890106201172, "distillation_loss": 5.722264766693115, "epoch": 1.74, "learning_rate": 3.759811616954474e-05, "loss": 164.0584, "step": 2054, "task_loss": 2.232388734817505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4722879174181478, "compression/movement_sparsity/importance_threshold": -8.431710403276373e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9219039638031875, "compression/movement_sparsity/model_sparsity": 0.8902337265382265, "compression_loss": 157.71612548828125, "distillation_loss": 5.818230628967285, "epoch": 1.74, "learning_rate": 3.759207825141891e-05, "loss": 164.3617, "step": 2055, "task_loss": 2.535738945007324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4725526807395415, "compression/movement_sparsity/importance_threshold": -8.351153207879474e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9219865386640659, "compression/movement_sparsity/model_sparsity": 0.8903134646986037, "compression_loss": 157.7432861328125, "distillation_loss": 6.892061710357666, "epoch": 1.74, "learning_rate": 3.7586040333293084e-05, "loss": 164.2182, "step": 2056, "task_loss": 3.531475067138672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4728157522882985, "compression/movement_sparsity/importance_threshold": -8.27111075318819e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9221089760173509, "compression/movement_sparsity/model_sparsity": 0.8904316959521421, "compression_loss": 157.77020263671875, "distillation_loss": 6.085649490356445, "epoch": 1.74, "learning_rate": 3.758000241516725e-05, "loss": 163.9001, "step": 2057, "task_loss": 3.5105717182159424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4730771374867666, "compression/movement_sparsity/importance_threshold": -8.191581389392546e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.92223900906542, "compression/movement_sparsity/model_sparsity": 0.8905572619649815, "compression_loss": 157.7969970703125, "distillation_loss": 6.869007110595703, "epoch": 1.74, "learning_rate": 3.757396449704142e-05, "loss": 164.4945, "step": 2058, "task_loss": 3.155095100402832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4733368417572943, "compression/movement_sparsity/importance_threshold": -8.11256346668265e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9223908394919275, "compression/movement_sparsity/model_sparsity": 0.8907038765492534, "compression_loss": 157.82359313964844, "distillation_loss": 6.042092800140381, "epoch": 1.74, "learning_rate": 3.756792657891559e-05, "loss": 163.8759, "step": 2059, "task_loss": 3.681732177734375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4735948705222297, "compression/movement_sparsity/importance_threshold": -8.034055335248442e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9225007803175301, "compression/movement_sparsity/model_sparsity": 0.8908100405692791, "compression_loss": 157.85003662109375, "distillation_loss": 6.541399955749512, "epoch": 1.74, "learning_rate": 3.756188866078976e-05, "loss": 164.6581, "step": 2060, "task_loss": 2.393188953399658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.473851229203921, "compression/movement_sparsity/importance_threshold": -7.956055345280117e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9225234243118706, "compression/movement_sparsity/model_sparsity": 0.8908319066727529, "compression_loss": 157.87637329101562, "distillation_loss": 5.936201572418213, "epoch": 1.74, "learning_rate": 3.755585074266393e-05, "loss": 163.7279, "step": 2061, "task_loss": 3.381924867630005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4741059232247165, "compression/movement_sparsity/importance_threshold": -7.878561846967613e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9225495263148255, "compression/movement_sparsity/model_sparsity": 0.8908571119916072, "compression_loss": 157.90243530273438, "distillation_loss": 6.148585319519043, "epoch": 1.74, "learning_rate": 3.75498128245381e-05, "loss": 164.0779, "step": 2062, "task_loss": 3.1262643337249756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4743589580069643, "compression/movement_sparsity/importance_threshold": -7.801573190500954e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9226355234118153, "compression/movement_sparsity/model_sparsity": 0.8909401548237574, "compression_loss": 157.92831420898438, "distillation_loss": 6.561042785644531, "epoch": 1.74, "learning_rate": 3.7543774906412274e-05, "loss": 164.9616, "step": 2063, "task_loss": 3.166872501373291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4746103389730125, "compression/movement_sparsity/importance_threshold": -7.725087726070251e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9227817017828633, "compression/movement_sparsity/model_sparsity": 0.8910813115180627, "compression_loss": 157.95407104492188, "distillation_loss": 8.221183776855469, "epoch": 1.74, "learning_rate": 3.753773698828644e-05, "loss": 164.642, "step": 2064, "task_loss": 3.5372185707092285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4748600715452094, "compression/movement_sparsity/importance_threshold": -7.649103803865527e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9228398678725911, "compression/movement_sparsity/model_sparsity": 0.8911374794236685, "compression_loss": 157.97955322265625, "distillation_loss": 6.502510070800781, "epoch": 1.75, "learning_rate": 3.753169907016061e-05, "loss": 164.1097, "step": 2065, "task_loss": 2.5702710151672363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4751081611459034, "compression/movement_sparsity/importance_threshold": -7.573619774076893e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9230080821054303, "compression/movement_sparsity/model_sparsity": 0.8912999149801221, "compression_loss": 158.0048828125, "distillation_loss": 7.297700881958008, "epoch": 1.75, "learning_rate": 3.752566115203478e-05, "loss": 164.8532, "step": 2066, "task_loss": 2.9602088928222656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.475354613197442, "compression/movement_sparsity/importance_threshold": -7.498633986894286e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9232413307485554, "compression/movement_sparsity/model_sparsity": 0.8915251508147991, "compression_loss": 158.02993774414062, "distillation_loss": 7.440400123596191, "epoch": 1.75, "learning_rate": 3.751962323390895e-05, "loss": 164.6024, "step": 2067, "task_loss": 4.1229143142700195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4755994331221745, "compression/movement_sparsity/importance_threshold": -7.424144792507817e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9233652466986273, "compression/movement_sparsity/model_sparsity": 0.891644809870776, "compression_loss": 158.0548553466797, "distillation_loss": 7.882330894470215, "epoch": 1.75, "learning_rate": 3.751358531578312e-05, "loss": 165.3719, "step": 2068, "task_loss": 3.4940640926361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4758426263424482, "compression/movement_sparsity/importance_threshold": -7.350150541107422e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9234522335015308, "compression/movement_sparsity/model_sparsity": 0.8917288084093971, "compression_loss": 158.0797119140625, "distillation_loss": 9.107924461364746, "epoch": 1.75, "learning_rate": 3.750754739765729e-05, "loss": 165.4019, "step": 2069, "task_loss": 4.153846263885498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4760841982806114, "compression/movement_sparsity/importance_threshold": -7.276649582883387e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9236599167292447, "compression/movement_sparsity/model_sparsity": 0.8919293570793306, "compression_loss": 158.10440063476562, "distillation_loss": 6.349693298339844, "epoch": 1.75, "learning_rate": 3.750150947953146e-05, "loss": 164.0524, "step": 2070, "task_loss": 2.878816843032837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4763241543590127, "compression/movement_sparsity/importance_threshold": -7.203640268025474e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9237574444963385, "compression/movement_sparsity/model_sparsity": 0.8920235344675942, "compression_loss": 158.1288299560547, "distillation_loss": 5.780635356903076, "epoch": 1.75, "learning_rate": 3.749547156140563e-05, "loss": 163.8705, "step": 2071, "task_loss": 2.8883330821990967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4765625, "compression/movement_sparsity/importance_threshold": -7.131120946723968e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9237529252368044, "compression/movement_sparsity/model_sparsity": 0.8920191704585281, "compression_loss": 158.15306091308594, "distillation_loss": 7.023695468902588, "epoch": 1.75, "learning_rate": 3.74894336432798e-05, "loss": 164.5476, "step": 2072, "task_loss": 2.204735517501831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4767992406259216, "compression/movement_sparsity/importance_threshold": -7.059089969168805e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9238106978290002, "compression/movement_sparsity/model_sparsity": 0.8920749583844526, "compression_loss": 158.17724609375, "distillation_loss": 7.226841926574707, "epoch": 1.75, "learning_rate": 3.748339572515397e-05, "loss": 164.8711, "step": 2073, "task_loss": 3.5587823390960693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4770343816591256, "compression/movement_sparsity/importance_threshold": -6.98754568555001e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9239283416668956, "compression/movement_sparsity/model_sparsity": 0.8921885607946015, "compression_loss": 158.2010955810547, "distillation_loss": 7.537065505981445, "epoch": 1.75, "learning_rate": 3.747735780702814e-05, "loss": 164.161, "step": 2074, "task_loss": 2.887624740600586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47726792852196, "compression/movement_sparsity/importance_threshold": -6.916486446057607e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9240291128075863, "compression/movement_sparsity/model_sparsity": 0.8922858701366012, "compression_loss": 158.22486877441406, "distillation_loss": 6.06566047668457, "epoch": 1.75, "learning_rate": 3.747131988890231e-05, "loss": 164.3281, "step": 2075, "task_loss": 3.465578317642212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4774998866367737, "compression/movement_sparsity/importance_threshold": -6.845910600881792e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9241099109674868, "compression/movement_sparsity/model_sparsity": 0.892363892631145, "compression_loss": 158.24842834472656, "distillation_loss": 5.879219055175781, "epoch": 1.75, "learning_rate": 3.7465281970776475e-05, "loss": 164.9828, "step": 2076, "task_loss": 2.490217924118042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4777302614259142, "compression/movement_sparsity/importance_threshold": -6.775816500212504e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9243039410232576, "compression/movement_sparsity/model_sparsity": 0.8925512571575938, "compression_loss": 158.27174377441406, "distillation_loss": 8.313295364379883, "epoch": 1.76, "learning_rate": 3.745924405265065e-05, "loss": 165.4238, "step": 2077, "task_loss": 3.354480504989624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47795905831173, "compression/movement_sparsity/importance_threshold": -6.706202494239765e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.924402005377895, "compression/movement_sparsity/model_sparsity": 0.892645952699968, "compression_loss": 158.2950897216797, "distillation_loss": 6.969394683837891, "epoch": 1.76, "learning_rate": 3.7453206134524816e-05, "loss": 164.8258, "step": 2078, "task_loss": 2.808372735977173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4781862827165693, "compression/movement_sparsity/importance_threshold": -6.6370669331536e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9245530726576737, "compression/movement_sparsity/model_sparsity": 0.8927918303539492, "compression_loss": 158.3180694580078, "distillation_loss": 5.652746200561523, "epoch": 1.76, "learning_rate": 3.744716821639899e-05, "loss": 164.6437, "step": 2079, "task_loss": 1.7458292245864868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47841194006278, "compression/movement_sparsity/importance_threshold": -6.568408167144206e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9246555489543363, "compression/movement_sparsity/model_sparsity": 0.8928907862745674, "compression_loss": 158.34088134765625, "distillation_loss": 6.885915756225586, "epoch": 1.76, "learning_rate": 3.744113029827316e-05, "loss": 165.0661, "step": 2080, "task_loss": 2.5404088497161865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4786360357727106, "compression/movement_sparsity/importance_threshold": -6.500224546401433e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9248361166248462, "compression/movement_sparsity/model_sparsity": 0.8930651508901041, "compression_loss": 158.3636474609375, "distillation_loss": 6.806939125061035, "epoch": 1.76, "learning_rate": 3.743509238014733e-05, "loss": 164.6045, "step": 2081, "task_loss": 2.705115556716919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4788585752687093, "compression/movement_sparsity/importance_threshold": -6.43251442111548e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9249612011433465, "compression/movement_sparsity/model_sparsity": 0.8931859383705889, "compression_loss": 158.38613891601562, "distillation_loss": 7.433000564575195, "epoch": 1.76, "learning_rate": 3.74290544620215e-05, "loss": 164.6337, "step": 2082, "task_loss": 3.7706639766693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4790795639731242, "compression/movement_sparsity/importance_threshold": -6.365276141476368e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9250869414910666, "compression/movement_sparsity/model_sparsity": 0.8933073591505423, "compression_loss": 158.40843200683594, "distillation_loss": 7.363675117492676, "epoch": 1.76, "learning_rate": 3.7423016543895666e-05, "loss": 165.2568, "step": 2083, "task_loss": 3.91349458694458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4792990073083037, "compression/movement_sparsity/importance_threshold": -6.298508057674037e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9251610263445882, "compression/movement_sparsity/model_sparsity": 0.8933788989614337, "compression_loss": 158.43051147460938, "distillation_loss": 7.433717727661133, "epoch": 1.76, "learning_rate": 3.741697862576984e-05, "loss": 165.9625, "step": 2084, "task_loss": 3.17760968208313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4795169106965955, "compression/movement_sparsity/importance_threshold": -6.232208519898683e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9252880783507483, "compression/movement_sparsity/model_sparsity": 0.8935015863403246, "compression_loss": 158.45254516601562, "distillation_loss": 6.165663719177246, "epoch": 1.76, "learning_rate": 3.741094070764401e-05, "loss": 164.7903, "step": 2085, "task_loss": 4.272960662841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4797332795603482, "compression/movement_sparsity/importance_threshold": -6.166375878340329e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9254304648364882, "compression/movement_sparsity/model_sparsity": 0.8936390814122472, "compression_loss": 158.4743194580078, "distillation_loss": 6.613044738769531, "epoch": 1.76, "learning_rate": 3.7404902789518174e-05, "loss": 165.0626, "step": 2086, "task_loss": 2.774812698364258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4799481193219097, "compression/movement_sparsity/importance_threshold": -6.101008483188827e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9254707685230974, "compression/movement_sparsity/model_sparsity": 0.8936780005432328, "compression_loss": 158.49598693847656, "distillation_loss": 7.976287841796875, "epoch": 1.76, "learning_rate": 3.739886487139235e-05, "loss": 165.0246, "step": 2087, "task_loss": 2.2189197540283203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4801614354036285, "compression/movement_sparsity/importance_threshold": -6.036104684634461e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9254908965180667, "compression/movement_sparsity/model_sparsity": 0.893697437079654, "compression_loss": 158.5174560546875, "distillation_loss": 6.275365829467773, "epoch": 1.76, "learning_rate": 3.7392826953266515e-05, "loss": 164.8943, "step": 2088, "task_loss": 2.8189098834991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.480373233227853, "compression/movement_sparsity/importance_threshold": -5.971662832867167e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9255871722475587, "compression/movement_sparsity/model_sparsity": 0.8937904054416591, "compression_loss": 158.53890991210938, "distillation_loss": 8.053070068359375, "epoch": 1.77, "learning_rate": 3.738678903514068e-05, "loss": 165.6037, "step": 2089, "task_loss": 3.9769766330718994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4805835182169307, "compression/movement_sparsity/importance_threshold": -5.907681278077056e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9257199597783516, "compression/movement_sparsity/model_sparsity": 0.8939186313122671, "compression_loss": 158.55996704101562, "distillation_loss": 7.240005016326904, "epoch": 1.77, "learning_rate": 3.7380751117014856e-05, "loss": 165.9497, "step": 2090, "task_loss": 3.2059333324432373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4807922957932103, "compression/movement_sparsity/importance_threshold": -5.844158370454065e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.925776957299651, "compression/movement_sparsity/model_sparsity": 0.893973670793365, "compression_loss": 158.5808563232422, "distillation_loss": 10.765399932861328, "epoch": 1.77, "learning_rate": 3.737471319888903e-05, "loss": 166.35, "step": 2091, "task_loss": 4.414034843444824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4809995713790398, "compression/movement_sparsity/importance_threshold": -5.781092460188306e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9258721956265588, "compression/movement_sparsity/model_sparsity": 0.894065637390756, "compression_loss": 158.60189819335938, "distillation_loss": 6.704811096191406, "epoch": 1.77, "learning_rate": 3.736867528076319e-05, "loss": 165.655, "step": 2092, "task_loss": 3.7092132568359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4812053503967673, "compression/movement_sparsity/importance_threshold": -5.7184818974698876e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9260008573953497, "compression/movement_sparsity/model_sparsity": 0.8941898792319791, "compression_loss": 158.62269592285156, "distillation_loss": 6.135540008544922, "epoch": 1.77, "learning_rate": 3.7362637362637365e-05, "loss": 164.2481, "step": 2093, "task_loss": 2.849139928817749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4814096382687416, "compression/movement_sparsity/importance_threshold": -5.656325032488661e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9261704667558022, "compression/movement_sparsity/model_sparsity": 0.8943536619891206, "compression_loss": 158.64329528808594, "distillation_loss": 5.725130081176758, "epoch": 1.77, "learning_rate": 3.735659944451154e-05, "loss": 164.6533, "step": 2094, "task_loss": 2.984590530395508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.48161244041731, "compression/movement_sparsity/importance_threshold": -5.594620215434824e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9262539836259238, "compression/movement_sparsity/model_sparsity": 0.8944343097978256, "compression_loss": 158.66375732421875, "distillation_loss": 5.930133819580078, "epoch": 1.77, "learning_rate": 3.7350561526385706e-05, "loss": 164.586, "step": 2095, "task_loss": 3.3640546798706055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4818137622648213, "compression/movement_sparsity/importance_threshold": -5.533365796498487e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.926468857126722, "compression/movement_sparsity/model_sparsity": 0.8946418017328432, "compression_loss": 158.68405151367188, "distillation_loss": 6.960702896118164, "epoch": 1.77, "learning_rate": 3.734452360825987e-05, "loss": 164.3518, "step": 2096, "task_loss": 2.9138264656066895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4820136092336238, "compression/movement_sparsity/importance_threshold": -5.472560125869587e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9265660748654573, "compression/movement_sparsity/model_sparsity": 0.8947356797431761, "compression_loss": 158.7042236328125, "distillation_loss": 6.667351722717285, "epoch": 1.77, "learning_rate": 3.733848569013405e-05, "loss": 164.567, "step": 2097, "task_loss": 3.5075442790985107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4822119867460652, "compression/movement_sparsity/importance_threshold": -5.4122015537381475e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9266823354999069, "compression/movement_sparsity/model_sparsity": 0.8948479464671729, "compression_loss": 158.7241668701172, "distillation_loss": 4.760207176208496, "epoch": 1.77, "learning_rate": 3.7332447772008214e-05, "loss": 164.973, "step": 2098, "task_loss": 1.6393064260482788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4824089002244938, "compression/movement_sparsity/importance_threshold": -5.352288430294366e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9267844063748699, "compression/movement_sparsity/model_sparsity": 0.8949465108935741, "compression_loss": 158.74392700195312, "distillation_loss": 6.518259048461914, "epoch": 1.77, "learning_rate": 3.732640985388238e-05, "loss": 164.707, "step": 2099, "task_loss": 3.243020534515381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4826043550912582, "compression/movement_sparsity/importance_threshold": -5.292819105728007e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9269328384136011, "compression/movement_sparsity/model_sparsity": 0.8950898438351446, "compression_loss": 158.7636260986328, "distillation_loss": 5.039811134338379, "epoch": 1.77, "learning_rate": 3.7320371935756555e-05, "loss": 164.335, "step": 2100, "task_loss": 1.7412668466567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4827983567687062, "compression/movement_sparsity/importance_threshold": -5.23379193022944e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.927053904487608, "compression/movement_sparsity/model_sparsity": 0.8952067509170666, "compression_loss": 158.78305053710938, "distillation_loss": 8.121597290039062, "epoch": 1.78, "learning_rate": 3.731433401763072e-05, "loss": 165.423, "step": 2101, "task_loss": 4.506771087646484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.482990910679186, "compression/movement_sparsity/importance_threshold": -5.1752052539885164e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9271582290302542, "compression/movement_sparsity/model_sparsity": 0.895307491590733, "compression_loss": 158.80238342285156, "distillation_loss": 7.60074520111084, "epoch": 1.78, "learning_rate": 3.730829609950489e-05, "loss": 164.6981, "step": 2102, "task_loss": 4.195941925048828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.483182022245046, "compression/movement_sparsity/importance_threshold": -5.1170574271952596e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.927309391703374, "compression/movement_sparsity/model_sparsity": 0.8954534613610003, "compression_loss": 158.82159423828125, "distillation_loss": 7.900430679321289, "epoch": 1.78, "learning_rate": 3.7302258181379063e-05, "loss": 165.3778, "step": 2103, "task_loss": 4.078195095062256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4833716968886344, "compression/movement_sparsity/importance_threshold": -5.059346800039867e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9273589127715658, "compression/movement_sparsity/model_sparsity": 0.8955012812281551, "compression_loss": 158.84059143066406, "distillation_loss": 6.955070495605469, "epoch": 1.78, "learning_rate": 3.729622026325324e-05, "loss": 165.3256, "step": 2104, "task_loss": 2.5524044036865234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4835599400322992, "compression/movement_sparsity/importance_threshold": -5.002071722712276e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9274604351348175, "compression/movement_sparsity/model_sparsity": 0.8955993159859098, "compression_loss": 158.8595428466797, "distillation_loss": 8.337209701538086, "epoch": 1.78, "learning_rate": 3.72901823451274e-05, "loss": 166.1333, "step": 2105, "task_loss": 4.282992839813232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4837467570983887, "compression/movement_sparsity/importance_threshold": -4.9452305454025965e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9275472430752065, "compression/movement_sparsity/model_sparsity": 0.8956831418064941, "compression_loss": 158.87820434570312, "distillation_loss": 5.976417541503906, "epoch": 1.78, "learning_rate": 3.728414442700157e-05, "loss": 165.164, "step": 2106, "task_loss": 3.3806724548339844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4839321535092513, "compression/movement_sparsity/importance_threshold": -4.888821618300766e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9276258114157593, "compression/movement_sparsity/model_sparsity": 0.8957590110828443, "compression_loss": 158.8968048095703, "distillation_loss": 5.764797687530518, "epoch": 1.78, "learning_rate": 3.7278106508875746e-05, "loss": 165.0053, "step": 2107, "task_loss": 3.1879451274871826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4841161346872347, "compression/movement_sparsity/importance_threshold": -4.832843291596896e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.927695639341435, "compression/movement_sparsity/model_sparsity": 0.8958264402044571, "compression_loss": 158.9152069091797, "distillation_loss": 8.304316520690918, "epoch": 1.78, "learning_rate": 3.7272068590749906e-05, "loss": 165.3387, "step": 2108, "task_loss": 4.559038162231445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4842987060546875, "compression/movement_sparsity/importance_threshold": -4.7772939154810956e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9278333277051262, "compression/movement_sparsity/model_sparsity": 0.8959593985492768, "compression_loss": 158.9334716796875, "distillation_loss": 7.059016704559326, "epoch": 1.78, "learning_rate": 3.726603067262408e-05, "loss": 165.2538, "step": 2109, "task_loss": 4.099099159240723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4844798730339577, "compression/movement_sparsity/importance_threshold": -4.722171840143303e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9279292934062596, "compression/movement_sparsity/model_sparsity": 0.8960520675333512, "compression_loss": 158.95162963867188, "distillation_loss": 6.886305809020996, "epoch": 1.78, "learning_rate": 3.7259992754498254e-05, "loss": 165.6854, "step": 2110, "task_loss": 3.0369884967803955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4846596410473936, "compression/movement_sparsity/importance_threshold": -4.667475415773715e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.928077785065829, "compression/movement_sparsity/model_sparsity": 0.8961954580476006, "compression_loss": 158.96951293945312, "distillation_loss": 5.922722816467285, "epoch": 1.78, "learning_rate": 3.725395483637242e-05, "loss": 165.4843, "step": 2111, "task_loss": 2.9188270568847656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4848380155173435, "compression/movement_sparsity/importance_threshold": -4.613202992562095e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9281727729852163, "compression/movement_sparsity/model_sparsity": 0.89628718283974, "compression_loss": 158.98728942871094, "distillation_loss": 7.716068744659424, "epoch": 1.78, "learning_rate": 3.724791691824659e-05, "loss": 165.4163, "step": 2112, "task_loss": 3.7409980297088623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4850150018661554, "compression/movement_sparsity/importance_threshold": -4.559352920698815e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9282400014423473, "compression/movement_sparsity/model_sparsity": 0.8963521017925495, "compression_loss": 159.0049285888672, "distillation_loss": 7.8005828857421875, "epoch": 1.79, "learning_rate": 3.724187900012076e-05, "loss": 165.2228, "step": 2113, "task_loss": 2.960425615310669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4851906055161774, "compression/movement_sparsity/importance_threshold": -4.505923550373811e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9283603997629666, "compression/movement_sparsity/model_sparsity": 0.896468364060467, "compression_loss": 159.0223846435547, "distillation_loss": 8.260976791381836, "epoch": 1.79, "learning_rate": 3.723584108199493e-05, "loss": 165.511, "step": 2114, "task_loss": 3.4331555366516113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.485364831889758, "compression/movement_sparsity/importance_threshold": -4.4529132317770205e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9285026670070301, "compression/movement_sparsity/model_sparsity": 0.8966057439870316, "compression_loss": 159.0397186279297, "distillation_loss": 8.048629760742188, "epoch": 1.79, "learning_rate": 3.72298031638691e-05, "loss": 165.7658, "step": 2115, "task_loss": 3.9720163345336914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4855376864092453, "compression/movement_sparsity/importance_threshold": -4.400320315098554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9285825947026933, "compression/movement_sparsity/model_sparsity": 0.8966829259204625, "compression_loss": 159.05697631835938, "distillation_loss": 7.3435893058776855, "epoch": 1.79, "learning_rate": 3.722376524574327e-05, "loss": 166.1063, "step": 2116, "task_loss": 3.122779607772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4857091744969875, "compression/movement_sparsity/importance_threshold": -4.348143150528436e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9286929409499955, "compression/movement_sparsity/model_sparsity": 0.8967894814347052, "compression_loss": 159.07412719726562, "distillation_loss": 9.602479934692383, "epoch": 1.79, "learning_rate": 3.721772732761744e-05, "loss": 166.8232, "step": 2117, "task_loss": 3.6714892387390137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4858793015753324, "compression/movement_sparsity/importance_threshold": -4.2963800882568626e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9287387536020526, "compression/movement_sparsity/model_sparsity": 0.8968337202812278, "compression_loss": 159.0909881591797, "distillation_loss": 6.056572914123535, "epoch": 1.79, "learning_rate": 3.7211689409491605e-05, "loss": 165.3918, "step": 2118, "task_loss": 2.5096402168273926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4860480730666286, "compression/movement_sparsity/importance_threshold": -4.245029478473685e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9288878295458362, "compression/movement_sparsity/model_sparsity": 0.8969776750077312, "compression_loss": 159.10777282714844, "distillation_loss": 6.089041709899902, "epoch": 1.79, "learning_rate": 3.720565149136578e-05, "loss": 165.6644, "step": 2119, "task_loss": 3.466416597366333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4862154943932244, "compression/movement_sparsity/importance_threshold": -4.194089671369014e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9289536747995214, "compression/movement_sparsity/model_sparsity": 0.8970412582743886, "compression_loss": 159.12440490722656, "distillation_loss": 6.5863823890686035, "epoch": 1.79, "learning_rate": 3.719961357323995e-05, "loss": 165.5238, "step": 2120, "task_loss": 2.5310258865356445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4863815709774677, "compression/movement_sparsity/importance_threshold": -4.14355901713296e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9290470648804454, "compression/movement_sparsity/model_sparsity": 0.8971314401187315, "compression_loss": 159.14089965820312, "distillation_loss": 6.355241298675537, "epoch": 1.79, "learning_rate": 3.7193575655114113e-05, "loss": 166.3652, "step": 2121, "task_loss": 3.017137050628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.486546308241707, "compression/movement_sparsity/importance_threshold": -4.0934358659554596e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.929107758893712, "compression/movement_sparsity/model_sparsity": 0.8971900491059257, "compression_loss": 159.1572723388672, "distillation_loss": 4.4748311042785645, "epoch": 1.79, "learning_rate": 3.718753773698829e-05, "loss": 165.1463, "step": 2122, "task_loss": 2.4547290802001953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4867097116082901, "compression/movement_sparsity/importance_threshold": -4.043718568026624e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9291622881123108, "compression/movement_sparsity/model_sparsity": 0.8972427050781141, "compression_loss": 159.17340087890625, "distillation_loss": 6.962255954742432, "epoch": 1.79, "learning_rate": 3.718149981886246e-05, "loss": 165.5693, "step": 2123, "task_loss": 3.743806838989258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4868717864995658, "compression/movement_sparsity/importance_threshold": -3.994405473536477e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9292816490303457, "compression/movement_sparsity/model_sparsity": 0.8973579655814174, "compression_loss": 159.18948364257812, "distillation_loss": 5.165178298950195, "epoch": 1.79, "learning_rate": 3.717546190073663e-05, "loss": 165.6816, "step": 2124, "task_loss": 3.4439103603363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4870325383378815, "compression/movement_sparsity/importance_threshold": -3.945494932675129e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9293313370368844, "compression/movement_sparsity/model_sparsity": 0.8974059466520733, "compression_loss": 159.20530700683594, "distillation_loss": 5.668382167816162, "epoch": 1.8, "learning_rate": 3.7169423982610796e-05, "loss": 165.1665, "step": 2125, "task_loss": 2.804696798324585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.487191972545586, "compression/movement_sparsity/importance_threshold": -3.896985295632604e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9294567077354079, "compression/movement_sparsity/model_sparsity": 0.8975270104814171, "compression_loss": 159.2210235595703, "distillation_loss": 6.560277938842773, "epoch": 1.8, "learning_rate": 3.716338606448497e-05, "loss": 164.9624, "step": 2126, "task_loss": 3.6128621101379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.487350094545027, "compression/movement_sparsity/importance_threshold": -3.848874912598839e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9295994996428474, "compression/movement_sparsity/model_sparsity": 0.8976648970475567, "compression_loss": 159.2365264892578, "distillation_loss": 5.549208641052246, "epoch": 1.8, "learning_rate": 3.715734814635914e-05, "loss": 164.923, "step": 2127, "task_loss": 3.7202606201171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4875069097585532, "compression/movement_sparsity/importance_threshold": -3.8011621337640314e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9296634131813756, "compression/movement_sparsity/model_sparsity": 0.8977266149594154, "compression_loss": 159.251953125, "distillation_loss": 6.0247697830200195, "epoch": 1.8, "learning_rate": 3.7151310228233304e-05, "loss": 165.4937, "step": 2128, "task_loss": 3.0278818607330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4876624236085125, "compression/movement_sparsity/importance_threshold": -3.7538453093181184e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9297507219368053, "compression/movement_sparsity/model_sparsity": 0.8978109243905031, "compression_loss": 159.26710510253906, "distillation_loss": 6.926674842834473, "epoch": 1.8, "learning_rate": 3.714527231010748e-05, "loss": 165.619, "step": 2129, "task_loss": 2.1637282371520996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4878166415172533, "compression/movement_sparsity/importance_threshold": -3.706922789451124e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9298541640910465, "compression/movement_sparsity/model_sparsity": 0.8979108129885207, "compression_loss": 159.28213500976562, "distillation_loss": 6.85511589050293, "epoch": 1.8, "learning_rate": 3.7139234391981645e-05, "loss": 165.4317, "step": 2130, "task_loss": 3.002366304397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4879695689071237, "compression/movement_sparsity/importance_threshold": -3.660392924353245e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9299306933989334, "compression/movement_sparsity/model_sparsity": 0.8979847132792501, "compression_loss": 159.29702758789062, "distillation_loss": 4.378478527069092, "epoch": 1.8, "learning_rate": 3.713319647385581e-05, "loss": 165.1144, "step": 2131, "task_loss": 2.2784366607666016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4881212112004718, "compression/movement_sparsity/importance_threshold": -3.614254064214419e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9300515925345935, "compression/movement_sparsity/model_sparsity": 0.8981014591576709, "compression_loss": 159.31187438964844, "distillation_loss": 6.954366683959961, "epoch": 1.8, "learning_rate": 3.7127158555729986e-05, "loss": 165.4568, "step": 2132, "task_loss": 3.723555564880371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.488271573819646, "compression/movement_sparsity/importance_threshold": -3.56850455922467e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9302106490066883, "compression/movement_sparsity/model_sparsity": 0.8982550515506343, "compression_loss": 159.32643127441406, "distillation_loss": 7.012324333190918, "epoch": 1.8, "learning_rate": 3.7121120637604153e-05, "loss": 165.3813, "step": 2133, "task_loss": 3.294421434402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4884206621869942, "compression/movement_sparsity/importance_threshold": -3.5231427595741084e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9303314765973424, "compression/movement_sparsity/model_sparsity": 0.8983717283418404, "compression_loss": 159.34104919433594, "distillation_loss": 7.716732025146484, "epoch": 1.8, "learning_rate": 3.711508271947833e-05, "loss": 165.2608, "step": 2134, "task_loss": 3.2470779418945312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4885684817248646, "compression/movement_sparsity/importance_threshold": -3.4781670154528446e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9304352884007803, "compression/movement_sparsity/model_sparsity": 0.8984719738904676, "compression_loss": 159.3553924560547, "distillation_loss": 6.853826999664307, "epoch": 1.8, "learning_rate": 3.7109044801352495e-05, "loss": 165.7967, "step": 2135, "task_loss": 3.518109083175659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4887150378556058, "compression/movement_sparsity/importance_threshold": -3.433575677050729e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.930519914218492, "compression/movement_sparsity/model_sparsity": 0.8985536925510015, "compression_loss": 159.36978149414062, "distillation_loss": 6.5251007080078125, "epoch": 1.81, "learning_rate": 3.710300688322667e-05, "loss": 166.5522, "step": 2136, "task_loss": 3.2383108139038086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4888603360015658, "compression/movement_sparsity/importance_threshold": -3.3893670945579595e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9305726905844482, "compression/movement_sparsity/model_sparsity": 0.8986046558864281, "compression_loss": 159.38400268554688, "distillation_loss": 5.866753578186035, "epoch": 1.81, "learning_rate": 3.7096968965100836e-05, "loss": 165.5232, "step": 2137, "task_loss": 3.39377498626709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4890043815850924, "compression/movement_sparsity/importance_threshold": -3.345539618164559e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.930643233960182, "compression/movement_sparsity/model_sparsity": 0.8986727758801887, "compression_loss": 159.3979034423828, "distillation_loss": 4.7557830810546875, "epoch": 1.81, "learning_rate": 3.7090931046975e-05, "loss": 165.4998, "step": 2138, "task_loss": 3.002495288848877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4891471800285343, "compression/movement_sparsity/importance_threshold": -3.302091598060466e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9306605001549187, "compression/movement_sparsity/model_sparsity": 0.8986894489280192, "compression_loss": 159.411865234375, "distillation_loss": 7.329342842102051, "epoch": 1.81, "learning_rate": 3.708489312884918e-05, "loss": 166.3008, "step": 2139, "task_loss": 3.2656755447387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4892887367542396, "compression/movement_sparsity/importance_threshold": -3.259021384435876e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9307779770544673, "compression/movement_sparsity/model_sparsity": 0.898802890134667, "compression_loss": 159.42555236816406, "distillation_loss": 5.809759140014648, "epoch": 1.81, "learning_rate": 3.7078855210723344e-05, "loss": 165.9357, "step": 2140, "task_loss": 2.366703987121582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4894290571845565, "compression/movement_sparsity/importance_threshold": -3.216327327480728e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9309118496845151, "compression/movement_sparsity/model_sparsity": 0.8989321638280323, "compression_loss": 159.4390869140625, "distillation_loss": 8.127060890197754, "epoch": 1.81, "learning_rate": 3.707281729259751e-05, "loss": 166.01, "step": 2141, "task_loss": 3.5388832092285156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.489568146741833, "compression/movement_sparsity/importance_threshold": -3.174007777385218e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9310097828733085, "compression/movement_sparsity/model_sparsity": 0.8990267327105128, "compression_loss": 159.45266723632812, "distillation_loss": 7.221467971801758, "epoch": 1.81, "learning_rate": 3.7066779374471685e-05, "loss": 166.0727, "step": 2142, "task_loss": 3.764788866043091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4897060108484173, "compression/movement_sparsity/importance_threshold": -3.132061084339197e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.931155257718466, "compression/movement_sparsity/model_sparsity": 0.8991672100472062, "compression_loss": 159.46607971191406, "distillation_loss": 6.659930229187012, "epoch": 1.81, "learning_rate": 3.706074145634585e-05, "loss": 166.7952, "step": 2143, "task_loss": 2.9592795372009277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.489842654926658, "compression/movement_sparsity/importance_threshold": -3.0904855985328626e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9312677622401104, "compression/movement_sparsity/model_sparsity": 0.8992758496924278, "compression_loss": 159.4792938232422, "distillation_loss": 6.183413505554199, "epoch": 1.81, "learning_rate": 3.7054703538220026e-05, "loss": 165.463, "step": 2144, "task_loss": 3.170867919921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4899780843989028, "compression/movement_sparsity/importance_threshold": -3.0492796701561513e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9314213693675956, "compression/movement_sparsity/model_sparsity": 0.899424179942533, "compression_loss": 159.49249267578125, "distillation_loss": 6.067525386810303, "epoch": 1.81, "learning_rate": 3.7048665620094194e-05, "loss": 166.0803, "step": 2145, "task_loss": 2.969949245452881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4901123046875, "compression/movement_sparsity/importance_threshold": -3.008441649399174e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9314494627065457, "compression/movement_sparsity/model_sparsity": 0.899451308188865, "compression_loss": 159.5055694580078, "distillation_loss": 5.507415771484375, "epoch": 1.81, "learning_rate": 3.704262770196836e-05, "loss": 165.7405, "step": 2146, "task_loss": 2.9966232776641846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490245321214798, "compression/movement_sparsity/importance_threshold": -2.9679698864519544e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9314968255003954, "compression/movement_sparsity/model_sparsity": 0.8994970439250409, "compression_loss": 159.5183868408203, "distillation_loss": 5.309004306793213, "epoch": 1.81, "learning_rate": 3.7036589783842535e-05, "loss": 165.4328, "step": 2147, "task_loss": 2.4411094188690186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4903771394031449, "compression/movement_sparsity/importance_threshold": -2.9278627315046032e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.931572448571542, "compression/movement_sparsity/model_sparsity": 0.8995700691110499, "compression_loss": 159.5310821533203, "distillation_loss": 6.944520950317383, "epoch": 1.82, "learning_rate": 3.70305518657167e-05, "loss": 165.8476, "step": 2148, "task_loss": 3.641326665878296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490507764674889, "compression/movement_sparsity/importance_threshold": -2.888118534746971e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9316739709347936, "compression/movement_sparsity/model_sparsity": 0.8996681038688046, "compression_loss": 159.543701171875, "distillation_loss": 6.9009246826171875, "epoch": 1.82, "learning_rate": 3.702451394759087e-05, "loss": 165.7909, "step": 2149, "task_loss": 3.2321109771728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4906372024523782, "compression/movement_sparsity/importance_threshold": -2.8487356463693415e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9316792056443859, "compression/movement_sparsity/model_sparsity": 0.8996731587500184, "compression_loss": 159.55606079101562, "distillation_loss": 5.472177505493164, "epoch": 1.82, "learning_rate": 3.701847602946504e-05, "loss": 165.8256, "step": 2150, "task_loss": 2.0703155994415283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490765458157961, "compression/movement_sparsity/importance_threshold": -2.809712416561652e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317664309306422, "compression/movement_sparsity/model_sparsity": 0.8997573875793554, "compression_loss": 159.56846618652344, "distillation_loss": 6.139923572540283, "epoch": 1.82, "learning_rate": 3.701243811133921e-05, "loss": 165.6216, "step": 2151, "task_loss": 3.5726733207702637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4908925372139854, "compression/movement_sparsity/importance_threshold": -2.7710471955139267e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9318045763429093, "compression/movement_sparsity/model_sparsity": 0.8997942225793621, "compression_loss": 159.5805206298828, "distillation_loss": 5.594900608062744, "epoch": 1.82, "learning_rate": 3.7006400193213384e-05, "loss": 165.0827, "step": 2152, "task_loss": 2.7253451347351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4910184450427997, "compression/movement_sparsity/importance_threshold": -2.7327383334162757e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317699127875918, "compression/movement_sparsity/model_sparsity": 0.8997607498238075, "compression_loss": 159.59254455566406, "distillation_loss": 8.02879524230957, "epoch": 1.82, "learning_rate": 3.700036227508755e-05, "loss": 166.3777, "step": 2153, "task_loss": 3.7876791954040527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.491143187066752, "compression/movement_sparsity/importance_threshold": -2.694784180458723e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317664190064745, "compression/movement_sparsity/model_sparsity": 0.8997573760648196, "compression_loss": 159.60446166992188, "distillation_loss": 5.754787921905518, "epoch": 1.82, "learning_rate": 3.6994324356961725e-05, "loss": 165.7267, "step": 2154, "task_loss": 2.83111310005188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4912667687081906, "compression/movement_sparsity/importance_threshold": -2.657183086831206e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9319087339472085, "compression/movement_sparsity/model_sparsity": 0.8998948020495275, "compression_loss": 159.6162567138672, "distillation_loss": 6.603320121765137, "epoch": 1.82, "learning_rate": 3.698828643883589e-05, "loss": 166.9662, "step": 2155, "task_loss": 2.6097564697265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4913891953894636, "compression/movement_sparsity/importance_threshold": -2.6199334027239216e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9319926323906945, "compression/movement_sparsity/model_sparsity": 0.8999758183233779, "compression_loss": 159.62791442871094, "distillation_loss": 5.65250301361084, "epoch": 1.82, "learning_rate": 3.698224852071006e-05, "loss": 165.6558, "step": 2156, "task_loss": 3.757375478744507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4915104725329194, "compression/movement_sparsity/importance_threshold": -2.5830334783269807e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9320808473828646, "compression/movement_sparsity/model_sparsity": 0.9000610028591859, "compression_loss": 159.6395721435547, "distillation_loss": 5.8403778076171875, "epoch": 1.82, "learning_rate": 3.6976210602584234e-05, "loss": 165.4961, "step": 2157, "task_loss": 2.883249282836914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4916306055609059, "compression/movement_sparsity/importance_threshold": -2.546481663830147e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9322117628193387, "compression/movement_sparsity/model_sparsity": 0.9001874209476741, "compression_loss": 159.6510467529297, "distillation_loss": 6.101372241973877, "epoch": 1.82, "learning_rate": 3.69701726844584e-05, "loss": 166.0448, "step": 2158, "task_loss": 3.9884471893310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4917495998957713, "compression/movement_sparsity/importance_threshold": -2.5102763094237042e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9322761056279019, "compression/movement_sparsity/model_sparsity": 0.9002495533828215, "compression_loss": 159.6622772216797, "distillation_loss": 5.891214370727539, "epoch": 1.82, "learning_rate": 3.696413476633257e-05, "loss": 166.0175, "step": 2159, "task_loss": 2.809208393096924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4918674609598641, "compression/movement_sparsity/importance_threshold": -2.47441576529759e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9324273398460275, "compression/movement_sparsity/model_sparsity": 0.9003955922403036, "compression_loss": 159.67349243164062, "distillation_loss": 7.285126209259033, "epoch": 1.83, "learning_rate": 3.695809684820674e-05, "loss": 165.8108, "step": 2160, "task_loss": 3.6934750080108643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4919841941755325, "compression/movement_sparsity/importance_threshold": -2.4388983816419144e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9324955937815752, "compression/movement_sparsity/model_sparsity": 0.9004615014431916, "compression_loss": 159.68466186523438, "distillation_loss": 5.652138710021973, "epoch": 1.83, "learning_rate": 3.695205893008091e-05, "loss": 165.9007, "step": 2161, "task_loss": 2.384229898452759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4920998049651244, "compression/movement_sparsity/importance_threshold": -2.4037225086467016e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9325608547510462, "compression/movement_sparsity/model_sparsity": 0.9005245204975951, "compression_loss": 159.695556640625, "distillation_loss": 7.637294769287109, "epoch": 1.83, "learning_rate": 3.6946021011955076e-05, "loss": 166.3575, "step": 2162, "task_loss": 3.6324594020843506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4922142987509879, "compression/movement_sparsity/importance_threshold": -2.3688864965019756e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9326347488178858, "compression/movement_sparsity/model_sparsity": 0.900595876075914, "compression_loss": 159.70648193359375, "distillation_loss": 5.64243221282959, "epoch": 1.83, "learning_rate": 3.693998309382925e-05, "loss": 165.5204, "step": 2163, "task_loss": 2.139094114303589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4923276809554715, "compression/movement_sparsity/importance_threshold": -2.3343886953978467e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9326593722240538, "compression/movement_sparsity/model_sparsity": 0.9006196535923296, "compression_loss": 159.71719360351562, "distillation_loss": 6.932564735412598, "epoch": 1.83, "learning_rate": 3.6933945175703424e-05, "loss": 166.0483, "step": 2164, "task_loss": 2.399298906326294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4924399570009235, "compression/movement_sparsity/importance_threshold": -2.3002274555241654e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9326743370544368, "compression/movement_sparsity/model_sparsity": 0.9006341043347518, "compression_loss": 159.72779846191406, "distillation_loss": 8.339947700500488, "epoch": 1.83, "learning_rate": 3.6927907257577585e-05, "loss": 166.4438, "step": 2165, "task_loss": 3.3859751224517822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4925511323096916, "compression/movement_sparsity/importance_threshold": -2.2664011270713026e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.932698626583911, "compression/movement_sparsity/model_sparsity": 0.9006575594441653, "compression_loss": 159.7381591796875, "distillation_loss": 4.974183082580566, "epoch": 1.83, "learning_rate": 3.692186933945176e-05, "loss": 166.1561, "step": 2166, "task_loss": 3.0369067192077637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4926612123041245, "compression/movement_sparsity/importance_threshold": -2.232908060229022e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9327527026841397, "compression/movement_sparsity/model_sparsity": 0.9007097778639935, "compression_loss": 159.74868774414062, "distillation_loss": 5.794914722442627, "epoch": 1.83, "learning_rate": 3.691583142132593e-05, "loss": 165.5491, "step": 2167, "task_loss": 2.7701401710510254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4927702024065703, "compression/movement_sparsity/importance_threshold": -2.199746605187434e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9328387117052972, "compression/movement_sparsity/model_sparsity": 0.9007928322106795, "compression_loss": 159.75888061523438, "distillation_loss": 8.15744400024414, "epoch": 1.83, "learning_rate": 3.690979350320009e-05, "loss": 166.7437, "step": 2168, "task_loss": 4.08432674407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4928781080393767, "compression/movement_sparsity/importance_threshold": -2.1669151121367358e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9328935986489251, "compression/movement_sparsity/model_sparsity": 0.9008458336189418, "compression_loss": 159.76902770996094, "distillation_loss": 7.673264980316162, "epoch": 1.83, "learning_rate": 3.690375558507427e-05, "loss": 166.0066, "step": 2169, "task_loss": 4.269114971160889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4929849346248927, "compression/movement_sparsity/importance_threshold": -2.1344119312667782e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9330171687981355, "compression/movement_sparsity/model_sparsity": 0.9009651587533807, "compression_loss": 159.7790985107422, "distillation_loss": 5.9905266761779785, "epoch": 1.83, "learning_rate": 3.689771766694844e-05, "loss": 166.3865, "step": 2170, "task_loss": 3.4131176471710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.493090687585466, "compression/movement_sparsity/importance_threshold": -2.1022354127677584e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9331158412853222, "compression/movement_sparsity/model_sparsity": 0.9010604415370804, "compression_loss": 159.7891387939453, "distillation_loss": 4.596959114074707, "epoch": 1.83, "learning_rate": 3.689167974882261e-05, "loss": 165.1221, "step": 2171, "task_loss": 2.5268735885620117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4931953723434446, "compression/movement_sparsity/importance_threshold": -2.0703839068296134e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9331393199713972, "compression/movement_sparsity/model_sparsity": 0.9010831136580599, "compression_loss": 159.79879760742188, "distillation_loss": 5.933595657348633, "epoch": 1.84, "learning_rate": 3.6885641830696775e-05, "loss": 165.1783, "step": 2172, "task_loss": 2.5600225925445557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4932989943211772, "compression/movement_sparsity/importance_threshold": -2.038855763642454e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9332004432546986, "compression/movement_sparsity/model_sparsity": 0.9011421371685427, "compression_loss": 159.80856323242188, "distillation_loss": 5.872112274169922, "epoch": 1.84, "learning_rate": 3.687960391257095e-05, "loss": 165.9194, "step": 2173, "task_loss": 2.7253353595733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4934015589410117, "compression/movement_sparsity/importance_threshold": -2.007649333396304e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933262126973879, "compression/movement_sparsity/model_sparsity": 0.9012017018622078, "compression_loss": 159.8181610107422, "distillation_loss": 5.918325424194336, "epoch": 1.84, "learning_rate": 3.6873565994445116e-05, "loss": 165.9345, "step": 2174, "task_loss": 3.452815055847168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4935030716252964, "compression/movement_sparsity/importance_threshold": -1.976762966281187e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9333678704924738, "compression/movement_sparsity/model_sparsity": 0.9013038127656339, "compression_loss": 159.82754516601562, "distillation_loss": 7.002851486206055, "epoch": 1.84, "learning_rate": 3.6867528076319284e-05, "loss": 166.2592, "step": 2175, "task_loss": 3.4002463817596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4936035377963792, "compression/movement_sparsity/importance_threshold": -1.946195012487214e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9334123476377556, "compression/movement_sparsity/model_sparsity": 0.9013467619841475, "compression_loss": 159.83685302734375, "distillation_loss": 5.348321437835693, "epoch": 1.84, "learning_rate": 3.686149015819346e-05, "loss": 165.6941, "step": 2176, "task_loss": 3.303271770477295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4937029628766088, "compression/movement_sparsity/importance_threshold": -1.9159438222043218e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9335365378436832, "compression/movement_sparsity/model_sparsity": 0.9014666858744477, "compression_loss": 159.84608459472656, "distillation_loss": 7.034958839416504, "epoch": 1.84, "learning_rate": 3.685545224006763e-05, "loss": 166.2113, "step": 2177, "task_loss": 3.7367100715637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.493801352288333, "compression/movement_sparsity/importance_threshold": -1.8860077456226212e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9335566300661495, "compression/movement_sparsity/model_sparsity": 0.9014860878672615, "compression_loss": 159.85511779785156, "distillation_loss": 6.940305709838867, "epoch": 1.84, "learning_rate": 3.684941432194179e-05, "loss": 167.359, "step": 2178, "task_loss": 3.855077028274536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4938987114539004, "compression/movement_sparsity/importance_threshold": -1.856385132932223e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9336910035112381, "compression/movement_sparsity/model_sparsity": 0.9016158451711301, "compression_loss": 159.86415100097656, "distillation_loss": 7.866343021392822, "epoch": 1.84, "learning_rate": 3.6843376403815966e-05, "loss": 166.2573, "step": 2179, "task_loss": 3.4499807357788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4939950457956588, "compression/movement_sparsity/importance_threshold": -1.8270743343230637e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9336758001975024, "compression/movement_sparsity/model_sparsity": 0.9016011641379921, "compression_loss": 159.8729248046875, "distillation_loss": 5.8079681396484375, "epoch": 1.84, "learning_rate": 3.683733848569014e-05, "loss": 165.4043, "step": 2180, "task_loss": 3.4423396587371826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4940903607359564, "compression/movement_sparsity/importance_threshold": -1.7980736999852545e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9337295185727019, "compression/movement_sparsity/model_sparsity": 0.9016530371217465, "compression_loss": 159.88168334960938, "distillation_loss": 6.328896522521973, "epoch": 1.84, "learning_rate": 3.68313005675643e-05, "loss": 166.4665, "step": 2181, "task_loss": 4.046377182006836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4941846616971417, "compression/movement_sparsity/importance_threshold": -1.7693815801087323e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9337566699024088, "compression/movement_sparsity/model_sparsity": 0.9016792557197507, "compression_loss": 159.89035034179688, "distillation_loss": 8.63490104675293, "epoch": 1.84, "learning_rate": 3.6825262649438474e-05, "loss": 166.7287, "step": 2182, "task_loss": 3.969119071960449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4942779541015625, "compression/movement_sparsity/importance_threshold": -1.7409963248837812e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9337952803572137, "compression/movement_sparsity/model_sparsity": 0.9017165397866533, "compression_loss": 159.89894104003906, "distillation_loss": 5.827761650085449, "epoch": 1.84, "learning_rate": 3.681922473131265e-05, "loss": 166.188, "step": 2183, "task_loss": 3.3331120014190674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4943702433715673, "compression/movement_sparsity/importance_threshold": -1.7129162845002516e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9338439548095033, "compression/movement_sparsity/model_sparsity": 0.9017635421217667, "compression_loss": 159.90733337402344, "distillation_loss": 8.213016510009766, "epoch": 1.85, "learning_rate": 3.6813186813186815e-05, "loss": 165.9767, "step": 2184, "task_loss": 3.0984020233154297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4944615349295043, "compression/movement_sparsity/importance_threshold": -1.6851398091481673e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9339939608383625, "compression/movement_sparsity/model_sparsity": 0.901908394982062, "compression_loss": 159.91578674316406, "distillation_loss": 6.798361778259277, "epoch": 1.85, "learning_rate": 3.680714889506098e-05, "loss": 165.5089, "step": 2185, "task_loss": 2.7990546226501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4945518341977215, "compression/movement_sparsity/importance_threshold": -1.6576652490177257e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9340489789478343, "compression/movement_sparsity/model_sparsity": 0.901961523050218, "compression_loss": 159.9240264892578, "distillation_loss": 7.7666497230529785, "epoch": 1.85, "learning_rate": 3.6801110976935156e-05, "loss": 166.8684, "step": 2186, "task_loss": 3.7062816619873047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4946411465985672, "compression/movement_sparsity/importance_threshold": -1.630490954298864e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9341234811472232, "compression/movement_sparsity/model_sparsity": 0.9020334658698623, "compression_loss": 159.9322509765625, "distillation_loss": 7.355094909667969, "epoch": 1.85, "learning_rate": 3.6795073058809324e-05, "loss": 166.999, "step": 2187, "task_loss": 2.447727918624878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4947294775543898, "compression/movement_sparsity/importance_threshold": -1.6036152751816926e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9342715554609253, "compression/movement_sparsity/model_sparsity": 0.9021764533753589, "compression_loss": 159.9402313232422, "distillation_loss": 8.737361907958984, "epoch": 1.85, "learning_rate": 3.678903514068349e-05, "loss": 166.4723, "step": 2188, "task_loss": 3.3702759742736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.494816832487537, "compression/movement_sparsity/importance_threshold": -1.5770365618561488e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9343446983052037, "compression/movement_sparsity/model_sparsity": 0.9022470835379226, "compression_loss": 159.94825744628906, "distillation_loss": 9.457321166992188, "epoch": 1.85, "learning_rate": 3.6782997222557665e-05, "loss": 166.8252, "step": 2189, "task_loss": 3.891730546951294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4949032168203573, "compression/movement_sparsity/importance_threshold": -1.55075316451243e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9343520078199645, "compression/movement_sparsity/model_sparsity": 0.9022541419483647, "compression_loss": 159.95620727539062, "distillation_loss": 6.379671573638916, "epoch": 1.85, "learning_rate": 3.677695930443183e-05, "loss": 165.8596, "step": 2190, "task_loss": 3.244758367538452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.494988635975199, "compression/movement_sparsity/importance_threshold": -1.524763433340473e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.934434153410808, "compression/movement_sparsity/model_sparsity": 0.9023334655854532, "compression_loss": 159.96400451660156, "distillation_loss": 6.004927158355713, "epoch": 1.85, "learning_rate": 3.6770921386306e-05, "loss": 165.9915, "step": 2191, "task_loss": 2.917937755584717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4950730953744102, "compression/movement_sparsity/importance_threshold": -1.4990657185303888e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9345362481341063, "compression/movement_sparsity/model_sparsity": 0.9024320530409261, "compression_loss": 159.9718475341797, "distillation_loss": 8.180635452270508, "epoch": 1.85, "learning_rate": 3.676488346818017e-05, "loss": 166.3648, "step": 2192, "task_loss": 3.0410654544830322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495156600440339, "compression/movement_sparsity/importance_threshold": -1.4736583702721143e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9345853160839278, "compression/movement_sparsity/model_sparsity": 0.9024794353557207, "compression_loss": 159.9793701171875, "distillation_loss": 5.616904258728027, "epoch": 1.85, "learning_rate": 3.675884555005435e-05, "loss": 166.4426, "step": 2193, "task_loss": 2.4470934867858887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4952391565953338, "compression/movement_sparsity/importance_threshold": -1.4485397387557601e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9345914212577574, "compression/movement_sparsity/model_sparsity": 0.9024853307980475, "compression_loss": 159.98690795898438, "distillation_loss": 7.2730207443237305, "epoch": 1.85, "learning_rate": 3.6752807631928514e-05, "loss": 166.5278, "step": 2194, "task_loss": 2.7994096279144287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4953207692617425, "compression/movement_sparsity/importance_threshold": -1.4237081741714369e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9347345470418906, "compression/movement_sparsity/model_sparsity": 0.9026235397711894, "compression_loss": 159.99436950683594, "distillation_loss": 6.690430164337158, "epoch": 1.85, "learning_rate": 3.674676971380268e-05, "loss": 166.0103, "step": 2195, "task_loss": 3.372398853302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4954014438619134, "compression/movement_sparsity/importance_threshold": -1.3991620267091685e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9347729667100134, "compression/movement_sparsity/model_sparsity": 0.9026606396055193, "compression_loss": 160.00177001953125, "distillation_loss": 4.4670562744140625, "epoch": 1.86, "learning_rate": 3.6740731795676855e-05, "loss": 165.965, "step": 2196, "task_loss": 2.589193105697632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495481185818195, "compression/movement_sparsity/importance_threshold": -1.3748996465588921e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9349178214984538, "compression/movement_sparsity/model_sparsity": 0.9028005181863514, "compression_loss": 160.009033203125, "distillation_loss": 5.434789657592773, "epoch": 1.86, "learning_rate": 3.673469387755102e-05, "loss": 166.0112, "step": 2197, "task_loss": 2.8077433109283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4955600005529348, "compression/movement_sparsity/importance_threshold": -1.350919383910805e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9349239624447864, "compression/movement_sparsity/model_sparsity": 0.9028064481722856, "compression_loss": 160.0162811279297, "distillation_loss": 7.008647918701172, "epoch": 1.86, "learning_rate": 3.672865595942519e-05, "loss": 165.868, "step": 2198, "task_loss": 3.6527347564697266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4956378934884818, "compression/movement_sparsity/importance_threshold": -1.3272195889547575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9349464156524446, "compression/movement_sparsity/model_sparsity": 0.9028281300431867, "compression_loss": 160.02330017089844, "distillation_loss": 7.055698871612549, "epoch": 1.86, "learning_rate": 3.6722618041299364e-05, "loss": 166.1398, "step": 2199, "task_loss": 3.82051420211792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4957148700471836, "compression/movement_sparsity/importance_threshold": -1.3037986118810338e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9349829632262486, "compression/movement_sparsity/model_sparsity": 0.902863422095397, "compression_loss": 160.03028869628906, "distillation_loss": 6.413025379180908, "epoch": 1.86, "learning_rate": 3.671658012317353e-05, "loss": 167.4812, "step": 2200, "task_loss": 3.40012788772583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495790935651389, "compression/movement_sparsity/importance_threshold": -1.280654802879571e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9349892949592632, "compression/movement_sparsity/model_sparsity": 0.902869536313904, "compression_loss": 160.0372772216797, "distillation_loss": 6.814481735229492, "epoch": 1.86, "learning_rate": 3.67105422050477e-05, "loss": 166.0952, "step": 2201, "task_loss": 2.657379627227783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4958660957234455, "compression/movement_sparsity/importance_threshold": -1.2577865121403062e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9350578946956724, "compression/movement_sparsity/model_sparsity": 0.9029357794383299, "compression_loss": 160.0441131591797, "distillation_loss": 7.139746189117432, "epoch": 1.86, "learning_rate": 3.670450428692187e-05, "loss": 165.9225, "step": 2202, "task_loss": 3.4421777725219727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4959403556857018, "compression/movement_sparsity/importance_threshold": -1.2351920898535235e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9351021214334337, "compression/movement_sparsity/model_sparsity": 0.9029784868515919, "compression_loss": 160.0509796142578, "distillation_loss": 6.312686920166016, "epoch": 1.86, "learning_rate": 3.669846636879604e-05, "loss": 166.4353, "step": 2203, "task_loss": 2.844438076019287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4960137209605058, "compression/movement_sparsity/importance_threshold": -1.2128698862090732e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9351634235792498, "compression/movement_sparsity/model_sparsity": 0.9030376830801116, "compression_loss": 160.05775451660156, "distillation_loss": 6.673650741577148, "epoch": 1.86, "learning_rate": 3.669242845067021e-05, "loss": 166.7881, "step": 2204, "task_loss": 4.141193389892578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4960861969702057, "compression/movement_sparsity/importance_threshold": -1.1908182513969794e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9352639562365876, "compression/movement_sparsity/model_sparsity": 0.9031347621313954, "compression_loss": 160.06434631347656, "distillation_loss": 7.133143424987793, "epoch": 1.86, "learning_rate": 3.668639053254438e-05, "loss": 165.8784, "step": 2205, "task_loss": 3.424313545227051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49615778913715, "compression/movement_sparsity/importance_threshold": -1.1690355356074392e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9353431804063603, "compression/movement_sparsity/model_sparsity": 0.9032112647072144, "compression_loss": 160.07080078125, "distillation_loss": 6.824629783630371, "epoch": 1.86, "learning_rate": 3.668035261441855e-05, "loss": 166.4237, "step": 2206, "task_loss": 3.374077320098877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4962285028836866, "compression/movement_sparsity/importance_threshold": -1.1475200890304765e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9353846526613979, "compression/movement_sparsity/model_sparsity": 0.9032513122627077, "compression_loss": 160.07717895507812, "distillation_loss": 6.282980442047119, "epoch": 1.87, "learning_rate": 3.667431469629272e-05, "loss": 166.5712, "step": 2207, "task_loss": 3.4375152587890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.496298343632164, "compression/movement_sparsity/importance_threshold": -1.1262702618559418e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9354241455046078, "compression/movement_sparsity/model_sparsity": 0.9032894484052593, "compression_loss": 160.08355712890625, "distillation_loss": 7.188577651977539, "epoch": 1.87, "learning_rate": 3.666827677816689e-05, "loss": 167.0093, "step": 2208, "task_loss": 2.6003332138061523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4963673168049298, "compression/movement_sparsity/importance_threshold": -1.1052844042742059e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9354818227034626, "compression/movement_sparsity/model_sparsity": 0.9033451442148974, "compression_loss": 160.0897216796875, "distillation_loss": 6.781490325927734, "epoch": 1.87, "learning_rate": 3.666223886004106e-05, "loss": 166.8658, "step": 2209, "task_loss": 4.086533069610596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4964354278243328, "compression/movement_sparsity/importance_threshold": -1.0845608664750324e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9355251432044837, "compression/movement_sparsity/model_sparsity": 0.903386976523439, "compression_loss": 160.0957489013672, "distillation_loss": 7.534673690795898, "epoch": 1.87, "learning_rate": 3.665620094191523e-05, "loss": 167.4478, "step": 2210, "task_loss": 3.690523624420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4965026821127212, "compression/movement_sparsity/importance_threshold": -1.0640979986486186e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9356038069383774, "compression/movement_sparsity/model_sparsity": 0.9034629379160756, "compression_loss": 160.1017303466797, "distillation_loss": 6.992488384246826, "epoch": 1.87, "learning_rate": 3.66501630237894e-05, "loss": 166.7302, "step": 2211, "task_loss": 2.533961534500122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4965690850924427, "compression/movement_sparsity/importance_threshold": -1.0438941509849885e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9356397344554643, "compression/movement_sparsity/model_sparsity": 0.9034976312124245, "compression_loss": 160.10768127441406, "distillation_loss": 5.548211574554443, "epoch": 1.87, "learning_rate": 3.664412510566357e-05, "loss": 166.831, "step": 2212, "task_loss": 2.105015277862549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4966346421858459, "compression/movement_sparsity/importance_threshold": -1.023947673674079e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9357597392785516, "compression/movement_sparsity/model_sparsity": 0.9036135135006608, "compression_loss": 160.11325073242188, "distillation_loss": 5.13206672668457, "epoch": 1.87, "learning_rate": 3.663808718753774e-05, "loss": 165.3058, "step": 2213, "task_loss": 2.5881645679473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4966993588152788, "compression/movement_sparsity/importance_threshold": -1.0042569169060878e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9357917318203186, "compression/movement_sparsity/model_sparsity": 0.9036444070001975, "compression_loss": 160.1190643310547, "distillation_loss": 7.204558372497559, "epoch": 1.87, "learning_rate": 3.663204926941191e-05, "loss": 166.0462, "step": 2214, "task_loss": 3.9671390056610107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4967632404030895, "compression/movement_sparsity/importance_threshold": -9.848202308709517e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9359343448652435, "compression/movement_sparsity/model_sparsity": 0.9037821208483002, "compression_loss": 160.1246795654297, "distillation_loss": 7.157886505126953, "epoch": 1.87, "learning_rate": 3.662601135128608e-05, "loss": 166.1892, "step": 2215, "task_loss": 2.864903211593628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4968262923716265, "compression/movement_sparsity/importance_threshold": -9.656359657587814e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9358821289351661, "compression/movement_sparsity/model_sparsity": 0.903731698696056, "compression_loss": 160.13018798828125, "distillation_loss": 8.18702220916748, "epoch": 1.87, "learning_rate": 3.6619973433160246e-05, "loss": 166.8093, "step": 2216, "task_loss": 3.9553165435791016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.496888520143238, "compression/movement_sparsity/importance_threshold": -9.467024717596008e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9359327708751156, "compression/movement_sparsity/model_sparsity": 0.9037806009295754, "compression_loss": 160.13563537597656, "distillation_loss": 6.0620527267456055, "epoch": 1.87, "learning_rate": 3.661393551503442e-05, "loss": 165.3558, "step": 2217, "task_loss": 2.769505500793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4969499291402717, "compression/movement_sparsity/importance_threshold": -9.280180990635205e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9360137002008602, "compression/movement_sparsity/model_sparsity": 0.903858750084013, "compression_loss": 160.1409912109375, "distillation_loss": 5.198545932769775, "epoch": 1.87, "learning_rate": 3.660789759690859e-05, "loss": 165.9041, "step": 2218, "task_loss": 2.987011671066284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4970105247850765, "compression/movement_sparsity/importance_threshold": -9.095811978603908e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9361177385634831, "compression/movement_sparsity/model_sparsity": 0.9039592144088203, "compression_loss": 160.14625549316406, "distillation_loss": 5.245521545410156, "epoch": 1.88, "learning_rate": 3.6601859678782755e-05, "loss": 166.9062, "step": 2219, "task_loss": 2.909696578979492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4970703125, "compression/movement_sparsity/importance_threshold": -8.91390118340496e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9361651252056681, "compression/movement_sparsity/model_sparsity": 0.9040049731740678, "compression_loss": 160.15145874023438, "distillation_loss": 5.957149505615234, "epoch": 1.88, "learning_rate": 3.659582176065693e-05, "loss": 166.0536, "step": 2220, "task_loss": 2.8713061809539795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4971292977073907, "compression/movement_sparsity/importance_threshold": -8.73443210693773e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9361943274922083, "compression/movement_sparsity/model_sparsity": 0.9040331722722287, "compression_loss": 160.15650939941406, "distillation_loss": 6.874434471130371, "epoch": 1.88, "learning_rate": 3.6589783842531096e-05, "loss": 166.494, "step": 2221, "task_loss": 2.6459550857543945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4971874858295968, "compression/movement_sparsity/importance_threshold": -8.557388251102457e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9362388642583283, "compression/movement_sparsity/model_sparsity": 0.9040761790634212, "compression_loss": 160.1615753173828, "distillation_loss": 6.250699996948242, "epoch": 1.88, "learning_rate": 3.658374592440526e-05, "loss": 166.2379, "step": 2222, "task_loss": 3.9180636405944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4972448822889661, "compression/movement_sparsity/importance_threshold": -8.382753117800248e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9363022292851452, "compression/movement_sparsity/model_sparsity": 0.9041373673066334, "compression_loss": 160.16648864746094, "distillation_loss": 5.316680908203125, "epoch": 1.88, "learning_rate": 3.657770800627944e-05, "loss": 166.1566, "step": 2223, "task_loss": 3.8898046016693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4973014925078474, "compression/movement_sparsity/importance_threshold": -8.210510208930474e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9364030600466741, "compression/movement_sparsity/model_sparsity": 0.9042347342213121, "compression_loss": 160.17127990722656, "distillation_loss": 6.536879539489746, "epoch": 1.88, "learning_rate": 3.657167008815361e-05, "loss": 166.464, "step": 2224, "task_loss": 4.587782859802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4973573219085887, "compression/movement_sparsity/importance_threshold": -8.040643026394241e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9364950788483201, "compression/movement_sparsity/model_sparsity": 0.9043235918940385, "compression_loss": 160.176025390625, "distillation_loss": 7.860706329345703, "epoch": 1.88, "learning_rate": 3.656563217002778e-05, "loss": 167.0165, "step": 2225, "task_loss": 3.4239118099212646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4974123759135378, "compression/movement_sparsity/importance_threshold": -7.873135072092655e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9365924396770668, "compression/movement_sparsity/model_sparsity": 0.904417608078801, "compression_loss": 160.18069458007812, "distillation_loss": 5.045379638671875, "epoch": 1.88, "learning_rate": 3.6559594251901945e-05, "loss": 166.1685, "step": 2226, "task_loss": 2.22676682472229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4974666599450435, "compression/movement_sparsity/importance_threshold": -7.707969847925086e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.936577272135834, "compression/movement_sparsity/model_sparsity": 0.9044029615892704, "compression_loss": 160.1852569580078, "distillation_loss": 5.71860408782959, "epoch": 1.88, "learning_rate": 3.655355633377612e-05, "loss": 165.9178, "step": 2227, "task_loss": 2.1781108379364014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4975201794254536, "compression/movement_sparsity/importance_threshold": -7.5451308557935096e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9367110612967084, "compression/movement_sparsity/model_sparsity": 0.9045321546808851, "compression_loss": 160.18972778320312, "distillation_loss": 6.056445121765137, "epoch": 1.88, "learning_rate": 3.6547518415650287e-05, "loss": 166.8065, "step": 2228, "task_loss": 3.6674699783325195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4975729397771163, "compression/movement_sparsity/importance_threshold": -7.384601597596428e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9367594137964719, "compression/movement_sparsity/model_sparsity": 0.9045788461235319, "compression_loss": 160.19427490234375, "distillation_loss": 4.525057315826416, "epoch": 1.88, "learning_rate": 3.6541480497524454e-05, "loss": 166.3232, "step": 2229, "task_loss": 2.363933801651001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4976249464223799, "compression/movement_sparsity/importance_threshold": -7.226365575235816e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9367685715572162, "compression/movement_sparsity/model_sparsity": 0.9045876892870222, "compression_loss": 160.1986083984375, "distillation_loss": 8.245214462280273, "epoch": 1.88, "learning_rate": 3.653544257939863e-05, "loss": 166.9164, "step": 2230, "task_loss": 4.12413215637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4976762047835925, "compression/movement_sparsity/importance_threshold": -7.0704062906110435e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368000871322778, "compression/movement_sparsity/model_sparsity": 0.904618122205127, "compression_loss": 160.20294189453125, "distillation_loss": 7.933854579925537, "epoch": 1.89, "learning_rate": 3.6529404661272795e-05, "loss": 166.4808, "step": 2231, "task_loss": 3.388575315475464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4977267202831026, "compression/movement_sparsity/importance_threshold": -6.916707245623217e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368373859286427, "compression/movement_sparsity/model_sparsity": 0.9046541396730924, "compression_loss": 160.20712280273438, "distillation_loss": 6.536538124084473, "epoch": 1.89, "learning_rate": 3.652336674314696e-05, "loss": 166.948, "step": 2232, "task_loss": 2.6000890731811523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4977764983432582, "compression/movement_sparsity/importance_threshold": -6.765251942172576e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368213717715078, "compression/movement_sparsity/model_sparsity": 0.9046386756515203, "compression_loss": 160.21136474609375, "distillation_loss": 5.801495552062988, "epoch": 1.89, "learning_rate": 3.6517328825021136e-05, "loss": 166.8858, "step": 2233, "task_loss": 2.9945945739746094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4978255443864072, "compression/movement_sparsity/importance_threshold": -6.6160238821602255e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.936865276556743, "compression/movement_sparsity/model_sparsity": 0.9046810721723159, "compression_loss": 160.21539306640625, "distillation_loss": 5.843520164489746, "epoch": 1.89, "learning_rate": 3.651129090689531e-05, "loss": 166.1308, "step": 2234, "task_loss": 2.2337327003479004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4978738638348983, "compression/movement_sparsity/importance_threshold": -6.469006567485537e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9369365711550378, "compression/movement_sparsity/model_sparsity": 0.9047499175818314, "compression_loss": 160.2193145751953, "distillation_loss": 5.953176498413086, "epoch": 1.89, "learning_rate": 3.650525298876947e-05, "loss": 166.6612, "step": 2235, "task_loss": 2.551196813583374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4979214621110792, "compression/movement_sparsity/importance_threshold": -6.324183500049617e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9369228702864242, "compression/movement_sparsity/model_sparsity": 0.9047366873802035, "compression_loss": 160.2232666015625, "distillation_loss": 7.417908668518066, "epoch": 1.89, "learning_rate": 3.6499215070643644e-05, "loss": 166.7264, "step": 2236, "task_loss": 3.511472463607788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4979683446372987, "compression/movement_sparsity/importance_threshold": -6.181538181753571e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.936976517116618, "compression/movement_sparsity/model_sparsity": 0.9047884912767431, "compression_loss": 160.22714233398438, "distillation_loss": 5.96925163269043, "epoch": 1.89, "learning_rate": 3.649317715251782e-05, "loss": 166.5463, "step": 2237, "task_loss": 3.3122780323028564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4980145168359045, "compression/movement_sparsity/importance_threshold": -6.041054114495903e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9370168208032272, "compression/movement_sparsity/model_sparsity": 0.9048274104077286, "compression_loss": 160.23094177246094, "distillation_loss": 6.831334114074707, "epoch": 1.89, "learning_rate": 3.648713923439198e-05, "loss": 167.4659, "step": 2238, "task_loss": 3.7148828506469727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4980599841292448, "compression/movement_sparsity/importance_threshold": -5.902714800179454e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9371547357261036, "compression/movement_sparsity/model_sparsity": 0.9049605875287283, "compression_loss": 160.2347412109375, "distillation_loss": 6.468693256378174, "epoch": 1.89, "learning_rate": 3.648110131626615e-05, "loss": 166.6892, "step": 2239, "task_loss": 3.4829370975494385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498104751939668, "compression/movement_sparsity/importance_threshold": -5.7665037407027275e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9372125560149699, "compression/movement_sparsity/model_sparsity": 0.905016421512796, "compression_loss": 160.23838806152344, "distillation_loss": 6.69094181060791, "epoch": 1.89, "learning_rate": 3.647506339814033e-05, "loss": 166.6392, "step": 2240, "task_loss": 3.3223018646240234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4981488256895221, "compression/movement_sparsity/importance_threshold": -5.63240443796683e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.937311610075521, "compression/movement_sparsity/model_sparsity": 0.9051120727616413, "compression_loss": 160.24195861816406, "distillation_loss": 8.05595588684082, "epoch": 1.89, "learning_rate": 3.646902548001449e-05, "loss": 167.0704, "step": 2241, "task_loss": 3.416008472442627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4981922108011556, "compression/movement_sparsity/importance_threshold": -5.500400393873735e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9373872331466676, "compression/movement_sparsity/model_sparsity": 0.9051850979476503, "compression_loss": 160.245361328125, "distillation_loss": 6.689817428588867, "epoch": 1.89, "learning_rate": 3.646298756188866e-05, "loss": 166.2641, "step": 2242, "task_loss": 3.636019229888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4982349126969166, "compression/movement_sparsity/importance_threshold": -5.370475110321078e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9374037123463405, "compression/movement_sparsity/model_sparsity": 0.9052010110361184, "compression_loss": 160.24884033203125, "distillation_loss": 7.328866004943848, "epoch": 1.9, "learning_rate": 3.6456949643762835e-05, "loss": 166.4979, "step": 2243, "task_loss": 3.576991081237793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498276936799153, "compression/movement_sparsity/importance_threshold": -5.242612089210834e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9374651456580003, "compression/movement_sparsity/model_sparsity": 0.9052603339245319, "compression_loss": 160.2522735595703, "distillation_loss": 6.623807907104492, "epoch": 1.9, "learning_rate": 3.6450911725637e-05, "loss": 167.2635, "step": 2244, "task_loss": 2.831836462020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4983182885302133, "compression/movement_sparsity/importance_threshold": -5.116794832444108e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9375227155393463, "compression/movement_sparsity/model_sparsity": 0.9053159261033479, "compression_loss": 160.25572204589844, "distillation_loss": 6.56038761138916, "epoch": 1.9, "learning_rate": 3.644487380751117e-05, "loss": 166.1317, "step": 2245, "task_loss": 2.967102527618408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4983589733124458, "compression/movement_sparsity/importance_threshold": -4.993006841920271e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9375944990285142, "compression/movement_sparsity/model_sparsity": 0.905385243608831, "compression_loss": 160.2588348388672, "distillation_loss": 7.125331878662109, "epoch": 1.9, "learning_rate": 3.643883588938534e-05, "loss": 167.2203, "step": 2246, "task_loss": 4.350142002105713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498398996568198, "compression/movement_sparsity/importance_threshold": -4.8712316195404295e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9376362932360779, "compression/movement_sparsity/model_sparsity": 0.9054256020567909, "compression_loss": 160.26210021972656, "distillation_loss": 4.466840744018555, "epoch": 1.9, "learning_rate": 3.643279797125951e-05, "loss": 165.5341, "step": 2247, "task_loss": 2.2815232276916504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498438363719819, "compression/movement_sparsity/importance_threshold": -4.751452667204822e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9377269288342781, "compression/movement_sparsity/model_sparsity": 0.9055131240433651, "compression_loss": 160.26519775390625, "distillation_loss": 5.584445953369141, "epoch": 1.9, "learning_rate": 3.642676005313368e-05, "loss": 166.7658, "step": 2248, "task_loss": 3.8877828121185303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4984770801896568, "compression/movement_sparsity/importance_threshold": -4.633653486813688e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9377823523654496, "compression/movement_sparsity/model_sparsity": 0.9055666436057382, "compression_loss": 160.26832580566406, "distillation_loss": 8.422534942626953, "epoch": 1.9, "learning_rate": 3.642072213500785e-05, "loss": 167.4358, "step": 2249, "task_loss": 3.821789026260376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498515151400059, "compression/movement_sparsity/importance_threshold": -4.5178175802681325e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378435948904273, "compression/movement_sparsity/model_sparsity": 0.905625782261579, "compression_loss": 160.2713165283203, "distillation_loss": 7.25454568862915, "epoch": 1.9, "learning_rate": 3.6414684216882026e-05, "loss": 166.5585, "step": 2250, "task_loss": 4.325481414794922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4985525827733743, "compression/movement_sparsity/importance_threshold": -4.403928449467527e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378312414527566, "compression/movement_sparsity/model_sparsity": 0.9056138532024958, "compression_loss": 160.27426147460938, "distillation_loss": 6.13362979888916, "epoch": 1.9, "learning_rate": 3.6408646298756186e-05, "loss": 166.109, "step": 2251, "task_loss": 2.4424173831939697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4985893797319507, "compression/movement_sparsity/importance_threshold": -4.291969596313845e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378494138842337, "compression/movement_sparsity/model_sparsity": 0.9056314013550467, "compression_loss": 160.2769775390625, "distillation_loss": 6.943127155303955, "epoch": 1.9, "learning_rate": 3.640260838063036e-05, "loss": 166.8966, "step": 2252, "task_loss": 2.503864288330078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4986255476981365, "compression/movement_sparsity/importance_threshold": -4.181924522705591e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378513336752231, "compression/movement_sparsity/model_sparsity": 0.9056332551953096, "compression_loss": 160.27963256835938, "distillation_loss": 6.693708419799805, "epoch": 1.9, "learning_rate": 3.6396570462504534e-05, "loss": 166.0906, "step": 2253, "task_loss": 3.5935592651367188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49866109209428, "compression/movement_sparsity/importance_threshold": -4.073776730544737e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9379069122205739, "compression/movement_sparsity/model_sparsity": 0.905686924446648, "compression_loss": 160.28225708007812, "distillation_loss": 6.474967956542969, "epoch": 1.9, "learning_rate": 3.63905325443787e-05, "loss": 165.6588, "step": 2254, "task_loss": 2.2397470474243164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4986960183427291, "compression/movement_sparsity/importance_threshold": -3.967509721731523e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9379576495538644, "compression/movement_sparsity/model_sparsity": 0.9057359187964538, "compression_loss": 160.28485107421875, "distillation_loss": 6.957125663757324, "epoch": 1.91, "learning_rate": 3.638449462625287e-05, "loss": 166.6282, "step": 2255, "task_loss": 3.5468225479125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987303318658323, "compression/movement_sparsity/importance_threshold": -3.863106998166187e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9380159587336038, "compression/movement_sparsity/model_sparsity": 0.905792224876489, "compression_loss": 160.2872314453125, "distillation_loss": 6.309211730957031, "epoch": 1.91, "learning_rate": 3.637845670812704e-05, "loss": 165.927, "step": 2256, "task_loss": 3.3922078609466553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987640380859375, "compression/movement_sparsity/importance_threshold": -3.7605520617489674e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9379826545333968, "compression/movement_sparsity/model_sparsity": 0.9057600647780149, "compression_loss": 160.2897491455078, "distillation_loss": 6.718111991882324, "epoch": 1.91, "learning_rate": 3.637241879000121e-05, "loss": 167.1852, "step": 2257, "task_loss": 2.5863261222839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987971424253932, "compression/movement_sparsity/importance_threshold": -3.6598284143801035e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9380228747508326, "compression/movement_sparsity/model_sparsity": 0.9057989033072499, "compression_loss": 160.29217529296875, "distillation_loss": 6.152707576751709, "epoch": 1.91, "learning_rate": 3.6366380871875377e-05, "loss": 166.3755, "step": 2258, "task_loss": 2.6125926971435547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4988296503065472, "compression/movement_sparsity/importance_threshold": -3.5609195579615685e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9380781671161601, "compression/movement_sparsity/model_sparsity": 0.9058522962097292, "compression_loss": 160.29444885253906, "distillation_loss": 5.998579978942871, "epoch": 1.91, "learning_rate": 3.636034295374955e-05, "loss": 166.0301, "step": 2259, "task_loss": 2.7233455181121826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4988615671517482, "compression/movement_sparsity/importance_threshold": -3.4638089943918662e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9381247548391134, "compression/movement_sparsity/model_sparsity": 0.9058972835010785, "compression_loss": 160.2967071533203, "distillation_loss": 5.3853936195373535, "epoch": 1.91, "learning_rate": 3.635430503562372e-05, "loss": 166.3539, "step": 2260, "task_loss": 3.242863893508911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498892898383344, "compression/movement_sparsity/importance_threshold": -3.36848022557297e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9381330302114527, "compression/movement_sparsity/model_sparsity": 0.9059052745889199, "compression_loss": 160.29898071289062, "distillation_loss": 5.459070205688477, "epoch": 1.91, "learning_rate": 3.6348267117497885e-05, "loss": 165.9188, "step": 2261, "task_loss": 3.122023582458496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498923649423683, "compression/movement_sparsity/importance_threshold": -3.2749167534051188e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9381983984984325, "compression/movement_sparsity/model_sparsity": 0.9059683972741455, "compression_loss": 160.30105590820312, "distillation_loss": 6.814894676208496, "epoch": 1.91, "learning_rate": 3.634222919937206e-05, "loss": 166.7034, "step": 2262, "task_loss": 3.489725351333618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4989538256951132, "compression/movement_sparsity/importance_threshold": -3.1831020797876836e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9382597125684161, "compression/movement_sparsity/model_sparsity": 0.906027605017201, "compression_loss": 160.30328369140625, "distillation_loss": 6.812126159667969, "epoch": 1.91, "learning_rate": 3.6336191281246226e-05, "loss": 166.5372, "step": 2263, "task_loss": 3.029024362564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4989834326199831, "compression/movement_sparsity/importance_threshold": -3.0930197066217704e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9382236896579882, "compression/movement_sparsity/model_sparsity": 0.9059928196045658, "compression_loss": 160.3053741455078, "distillation_loss": 8.494635581970215, "epoch": 1.91, "learning_rate": 3.63301533631204e-05, "loss": 166.7912, "step": 2264, "task_loss": 4.287404537200928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990124756206404, "compression/movement_sparsity/importance_threshold": -3.0046531358084855e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9382585559241554, "compression/movement_sparsity/model_sparsity": 0.906026488107229, "compression_loss": 160.30738830566406, "distillation_loss": 5.719357490539551, "epoch": 1.91, "learning_rate": 3.632411544499457e-05, "loss": 166.7411, "step": 2265, "task_loss": 3.0443618297576904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499040960119434, "compression/movement_sparsity/importance_threshold": -2.9179858692472e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9382844671404281, "compression/movement_sparsity/model_sparsity": 0.9060515091935105, "compression_loss": 160.309326171875, "distillation_loss": 5.495291709899902, "epoch": 1.91, "learning_rate": 3.631807752686874e-05, "loss": 165.9636, "step": 2266, "task_loss": 3.589195966720581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990688915387116, "compression/movement_sparsity/importance_threshold": -2.83300140883902e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383269887222176, "compression/movement_sparsity/model_sparsity": 0.9060925700281538, "compression_loss": 160.31121826171875, "distillation_loss": 4.694790840148926, "epoch": 1.92, "learning_rate": 3.631203960874291e-05, "loss": 165.6907, "step": 2267, "task_loss": 1.6176011562347412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990962753008215, "compression/movement_sparsity/importance_threshold": -2.749683256484184e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383026753444081, "compression/movement_sparsity/model_sparsity": 0.9060690918896688, "compression_loss": 160.3130340576172, "distillation_loss": 6.856955528259277, "epoch": 1.92, "learning_rate": 3.6306001690617076e-05, "loss": 167.0673, "step": 2268, "task_loss": 2.5767476558685303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991231168281116, "compression/movement_sparsity/importance_threshold": -2.668014914083798e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383620219267318, "compression/movement_sparsity/model_sparsity": 0.9061263997343182, "compression_loss": 160.31483459472656, "distillation_loss": 6.369436264038086, "epoch": 1.92, "learning_rate": 3.629996377249125e-05, "loss": 166.8442, "step": 2269, "task_loss": 2.9285056591033936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991494215429304, "compression/movement_sparsity/importance_threshold": -2.5879798835372336e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9384354390268658, "compression/movement_sparsity/model_sparsity": 0.9061972947312052, "compression_loss": 160.31654357910156, "distillation_loss": 5.047272682189941, "epoch": 1.92, "learning_rate": 3.629392585436542e-05, "loss": 165.8542, "step": 2270, "task_loss": 2.8153228759765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991751948676264, "compression/movement_sparsity/importance_threshold": -2.5095616667455967e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9384625307357345, "compression/movement_sparsity/model_sparsity": 0.9062234557565304, "compression_loss": 160.31805419921875, "distillation_loss": 6.451539993286133, "epoch": 1.92, "learning_rate": 3.6287887936239584e-05, "loss": 165.6697, "step": 2271, "task_loss": 2.927915334701538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992004422245473, "compression/movement_sparsity/importance_threshold": -2.432743765609126e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9385876271784024, "compression/movement_sparsity/model_sparsity": 0.9063442547515509, "compression_loss": 160.31951904296875, "distillation_loss": 7.025057792663574, "epoch": 1.92, "learning_rate": 3.628185001811376e-05, "loss": 166.8978, "step": 2272, "task_loss": 2.3560400009155273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992251690360416, "compression/movement_sparsity/importance_threshold": -2.3575096820280597e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9386287536325785, "compression/movement_sparsity/model_sparsity": 0.9063839683855063, "compression_loss": 160.3209686279297, "distillation_loss": 7.400439262390137, "epoch": 1.92, "learning_rate": 3.6275812099987925e-05, "loss": 166.7368, "step": 2273, "task_loss": 3.0399365425109863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992493807244573, "compression/movement_sparsity/importance_threshold": -2.2838429179035044e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.938641047449411, "compression/movement_sparsity/model_sparsity": 0.9063958398719104, "compression_loss": 160.3223419189453, "distillation_loss": 8.527482032775879, "epoch": 1.92, "learning_rate": 3.62697741818621e-05, "loss": 167.3861, "step": 2274, "task_loss": 4.076850891113281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992730827121428, "compression/movement_sparsity/importance_threshold": -2.2117269751356985e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9386525661953472, "compression/movement_sparsity/model_sparsity": 0.906406962913488, "compression_loss": 160.3237762451172, "distillation_loss": 6.8045549392700195, "epoch": 1.92, "learning_rate": 3.6263736263736266e-05, "loss": 166.9141, "step": 2275, "task_loss": 3.4388017654418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992962804214458, "compression/movement_sparsity/importance_threshold": -2.1411453556248808e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9387365600321743, "compression/movement_sparsity/model_sparsity": 0.9064880713036247, "compression_loss": 160.3250732421875, "distillation_loss": 5.576113700866699, "epoch": 1.92, "learning_rate": 3.625769834561043e-05, "loss": 166.3438, "step": 2276, "task_loss": 2.7813143730163574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993189792747152, "compression/movement_sparsity/importance_threshold": -2.072081561272157e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9388340401025974, "compression/movement_sparsity/model_sparsity": 0.9065822026337451, "compression_loss": 160.3262176513672, "distillation_loss": 6.722973823547363, "epoch": 1.92, "learning_rate": 3.625166042748461e-05, "loss": 166.9828, "step": 2277, "task_loss": 2.5521368980407715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993411846942988, "compression/movement_sparsity/importance_threshold": -2.004519093976899e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9388386428313049, "compression/movement_sparsity/model_sparsity": 0.9065866472445618, "compression_loss": 160.32742309570312, "distillation_loss": 6.263413429260254, "epoch": 1.93, "learning_rate": 3.6245622509358774e-05, "loss": 166.5644, "step": 2278, "task_loss": 2.8139054775238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993629021025447, "compression/movement_sparsity/importance_threshold": -1.938441455640212e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9388515686290222, "compression/movement_sparsity/model_sparsity": 0.9065991290013631, "compression_loss": 160.32847595214844, "distillation_loss": 7.207777976989746, "epoch": 1.93, "learning_rate": 3.623958459123294e-05, "loss": 166.597, "step": 2279, "task_loss": 3.802414655685425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993841369218013, "compression/movement_sparsity/importance_threshold": -1.8738321481632028e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9389405467679209, "compression/movement_sparsity/model_sparsity": 0.906685050467462, "compression_loss": 160.3295440673828, "distillation_loss": 6.671743392944336, "epoch": 1.93, "learning_rate": 3.6233546673107116e-05, "loss": 167.1642, "step": 2280, "task_loss": 2.805103063583374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994048945744167, "compression/movement_sparsity/importance_threshold": -1.8106746734452422e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9389457337808426, "compression/movement_sparsity/model_sparsity": 0.9066900592905326, "compression_loss": 160.33045959472656, "distillation_loss": 5.942516803741455, "epoch": 1.93, "learning_rate": 3.622750875498128e-05, "loss": 166.639, "step": 2281, "task_loss": 2.929370403289795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994251804827392, "compression/movement_sparsity/importance_threshold": -1.748952533386569e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9390214760936656, "compression/movement_sparsity/model_sparsity": 0.9067631996218994, "compression_loss": 160.3313751220703, "distillation_loss": 6.769024848937988, "epoch": 1.93, "learning_rate": 3.622147083685546e-05, "loss": 166.5939, "step": 2282, "task_loss": 3.039930820465088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994450000691169, "compression/movement_sparsity/importance_threshold": -1.6886492298882894e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9390886687782937, "compression/movement_sparsity/model_sparsity": 0.9068280840311017, "compression_loss": 160.33216857910156, "distillation_loss": 6.010128021240234, "epoch": 1.93, "learning_rate": 3.6215432918729624e-05, "loss": 166.4873, "step": 2283, "task_loss": 3.6313719749450684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499464358755898, "compression/movement_sparsity/importance_threshold": -1.629748264851509e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9391763352587525, "compression/movement_sparsity/model_sparsity": 0.9069127388982632, "compression_loss": 160.33290100097656, "distillation_loss": 6.367352485656738, "epoch": 1.93, "learning_rate": 3.62093950006038e-05, "loss": 166.7826, "step": 2284, "task_loss": 2.403203248977661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994832619654308, "compression/movement_sparsity/importance_threshold": -1.5722331401755996e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9391981564855262, "compression/movement_sparsity/model_sparsity": 0.9069338104987672, "compression_loss": 160.33380126953125, "distillation_loss": 6.181642532348633, "epoch": 1.93, "learning_rate": 3.6203357082477965e-05, "loss": 166.2573, "step": 2285, "task_loss": 3.247931957244873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995017151200631, "compression/movement_sparsity/importance_threshold": -1.5160873577616668e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.939294110262492, "compression/movement_sparsity/model_sparsity": 0.9070264679683059, "compression_loss": 160.33441162109375, "distillation_loss": 5.73525333404541, "epoch": 1.93, "learning_rate": 3.619731916435213e-05, "loss": 166.7404, "step": 2286, "task_loss": 2.8805460929870605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995197236421438, "compression/movement_sparsity/importance_threshold": -1.4612944195090821e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9392826630615615, "compression/movement_sparsity/model_sparsity": 0.9070154140139431, "compression_loss": 160.3350372314453, "distillation_loss": 6.634737014770508, "epoch": 1.93, "learning_rate": 3.6191281246226306e-05, "loss": 166.7164, "step": 2287, "task_loss": 3.3304269313812256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995372929540205, "compression/movement_sparsity/importance_threshold": -1.4078378273198189e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.939242860189993, "compression/movement_sparsity/model_sparsity": 0.9069769784934609, "compression_loss": 160.33563232421875, "distillation_loss": 6.07640266418457, "epoch": 1.93, "learning_rate": 3.6185243328100473e-05, "loss": 166.5357, "step": 2288, "task_loss": 2.351408004760742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995544284780418, "compression/movement_sparsity/importance_threshold": -1.3557010830941157e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.939326985192664, "compression/movement_sparsity/model_sparsity": 0.9070582135434915, "compression_loss": 160.33609008789062, "distillation_loss": 5.704773902893066, "epoch": 1.93, "learning_rate": 3.617920540997464e-05, "loss": 166.453, "step": 2289, "task_loss": 2.63021183013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995711356365553, "compression/movement_sparsity/importance_threshold": -1.304867688731344e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9393338773615576, "compression/movement_sparsity/model_sparsity": 0.9070648689451807, "compression_loss": 160.336669921875, "distillation_loss": 7.515334129333496, "epoch": 1.94, "learning_rate": 3.6173167491848815e-05, "loss": 166.993, "step": 2290, "task_loss": 2.848987340927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995874198519097, "compression/movement_sparsity/importance_threshold": -1.2553211461326097e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9393913876220653, "compression/movement_sparsity/model_sparsity": 0.9071204035513177, "compression_loss": 160.33712768554688, "distillation_loss": 6.515684127807617, "epoch": 1.94, "learning_rate": 3.616712957372298e-05, "loss": 166.7115, "step": 2291, "task_loss": 4.2849321365356445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996032865464533, "compression/movement_sparsity/importance_threshold": -1.2070449571981515e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9394231059079767, "compression/movement_sparsity/model_sparsity": 0.9071510322165313, "compression_loss": 160.3374481201172, "distillation_loss": 6.23954963684082, "epoch": 1.94, "learning_rate": 3.616109165559715e-05, "loss": 166.5163, "step": 2292, "task_loss": 3.813804864883423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499618741142534, "compression/movement_sparsity/importance_threshold": -1.1600226238299427e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9395280624315075, "compression/movement_sparsity/model_sparsity": 0.9072523831605948, "compression_loss": 160.3378448486328, "distillation_loss": 7.553248405456543, "epoch": 1.94, "learning_rate": 3.615505373747132e-05, "loss": 166.6389, "step": 2293, "task_loss": 3.37321138381958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996337890625, "compression/movement_sparsity/importance_threshold": -1.11423764792562e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9395923575434002, "compression/movement_sparsity/model_sparsity": 0.907314469537599, "compression_loss": 160.33815002441406, "distillation_loss": 6.544445037841797, "epoch": 1.94, "learning_rate": 3.614901581934549e-05, "loss": 166.6987, "step": 2294, "task_loss": 3.0458126068115234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996484357286994, "compression/movement_sparsity/importance_threshold": -1.069673531388024e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9396370254753641, "compression/movement_sparsity/model_sparsity": 0.9073576029886853, "compression_loss": 160.33839416503906, "distillation_loss": 5.859119415283203, "epoch": 1.94, "learning_rate": 3.614297790121966e-05, "loss": 166.3688, "step": 2295, "task_loss": 2.3848659992218018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499662686563481, "compression/movement_sparsity/importance_threshold": -1.0263137761165261e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.939689718372147, "compression/movement_sparsity/model_sparsity": 0.9074084857223613, "compression_loss": 160.33863830566406, "distillation_loss": 5.787991046905518, "epoch": 1.94, "learning_rate": 3.613693998309383e-05, "loss": 166.0734, "step": 2296, "task_loss": 4.380331516265869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996765469891922, "compression/movement_sparsity/importance_threshold": -9.84141884011365e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9397602975203837, "compression/movement_sparsity/model_sparsity": 0.9074766402597292, "compression_loss": 160.33872985839844, "distillation_loss": 6.22559928894043, "epoch": 1.94, "learning_rate": 3.6130902064968005e-05, "loss": 166.1445, "step": 2297, "task_loss": 3.5651767253875732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996900224281817, "compression/movement_sparsity/importance_threshold": -9.431413569736466e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9398338815588646, "compression/movement_sparsity/model_sparsity": 0.9075476964601173, "compression_loss": 160.33876037597656, "distillation_loss": 8.017066955566406, "epoch": 1.94, "learning_rate": 3.6124864146842166e-05, "loss": 167.2958, "step": 2298, "task_loss": 4.0405192375183105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997031183027976, "compression/movement_sparsity/importance_threshold": -9.03295696904477e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9398198587377248, "compression/movement_sparsity/model_sparsity": 0.907534155366023, "compression_loss": 160.33883666992188, "distillation_loss": 6.559440612792969, "epoch": 1.94, "learning_rate": 3.611882622871634e-05, "loss": 166.792, "step": 2299, "task_loss": 3.385913610458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997158400353878, "compression/movement_sparsity/importance_threshold": -8.645884057023601e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9398600670309929, "compression/movement_sparsity/model_sparsity": 0.9075729823807221, "compression_loss": 160.33889770507812, "distillation_loss": 7.03973388671875, "epoch": 1.94, "learning_rate": 3.6112788310590513e-05, "loss": 167.3513, "step": 2300, "task_loss": 4.346315383911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499728193048301, "compression/movement_sparsity/importance_threshold": -8.270029852701366e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399225258210696, "compression/movement_sparsity/model_sparsity": 0.907633295519214, "compression_loss": 160.33889770507812, "distillation_loss": 8.175253868103027, "epoch": 1.94, "learning_rate": 3.610675039246468e-05, "loss": 167.1721, "step": 2301, "task_loss": 3.252653121948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499740182763885, "compression/movement_sparsity/importance_threshold": -7.905229375063105e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399236466928274, "compression/movement_sparsity/model_sparsity": 0.9076343778855787, "compression_loss": 160.33888244628906, "distillation_loss": 6.105640411376953, "epoch": 1.95, "learning_rate": 3.610071247433885e-05, "loss": 166.3246, "step": 2302, "task_loss": 2.7947325706481934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997518146044881, "compression/movement_sparsity/importance_threshold": -7.551317643119879e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399670387388542, "compression/movement_sparsity/model_sparsity": 0.907676279281335, "compression_loss": 160.3389129638672, "distillation_loss": 6.309852600097656, "epoch": 1.95, "learning_rate": 3.609467455621302e-05, "loss": 166.3784, "step": 2303, "task_loss": 3.589118480682373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997630939924584, "compression/movement_sparsity/importance_threshold": -7.208129675874073e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399956209686774, "compression/movement_sparsity/model_sparsity": 0.9077038796236345, "compression_loss": 160.33880615234375, "distillation_loss": 4.6221184730529785, "epoch": 1.95, "learning_rate": 3.608863663808719e-05, "loss": 166.0841, "step": 2304, "task_loss": 3.0640878677368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997740263501445, "compression/movement_sparsity/importance_threshold": -6.875500492345421e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9400290205622255, "compression/movement_sparsity/model_sparsity": 0.907736131838395, "compression_loss": 160.3385467529297, "distillation_loss": 4.470045566558838, "epoch": 1.95, "learning_rate": 3.6082598719961356e-05, "loss": 165.3681, "step": 2305, "task_loss": 2.0147652626037598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499784617099894, "compression/movement_sparsity/importance_threshold": -6.55326511151029e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9400840386716973, "compression/movement_sparsity/model_sparsity": 0.907789259906551, "compression_loss": 160.33851623535156, "distillation_loss": 9.148815155029297, "epoch": 1.95, "learning_rate": 3.607656080183553e-05, "loss": 167.3632, "step": 2306, "task_loss": 3.2318601608276367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997948716640557, "compression/movement_sparsity/importance_threshold": -6.241258552397086e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9401128593848732, "compression/movement_sparsity/model_sparsity": 0.9078170905395664, "compression_loss": 160.3382110595703, "distillation_loss": 5.4949727058410645, "epoch": 1.95, "learning_rate": 3.6070522883709704e-05, "loss": 166.1826, "step": 2307, "task_loss": 2.8028347492218018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998047954649776, "compression/movement_sparsity/importance_threshold": -5.939315834008196e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9401092463620796, "compression/movement_sparsity/model_sparsity": 0.9078136016352207, "compression_loss": 160.33786010742188, "distillation_loss": 7.650700092315674, "epoch": 1.95, "learning_rate": 3.6064484965583864e-05, "loss": 166.4169, "step": 2308, "task_loss": 3.234161376953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998143939250075, "compression/movement_sparsity/importance_threshold": -5.647271975337334e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9401593040178149, "compression/movement_sparsity/model_sparsity": 0.9078619396564862, "compression_loss": 160.33767700195312, "distillation_loss": 6.528079986572266, "epoch": 1.95, "learning_rate": 3.605844704745804e-05, "loss": 165.8884, "step": 2309, "task_loss": 2.630993366241455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499823672466494, "compression/movement_sparsity/importance_threshold": -5.364961995386885e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.940188410911014, "compression/movement_sparsity/model_sparsity": 0.9078900466383606, "compression_loss": 160.33731079101562, "distillation_loss": 5.694217205047607, "epoch": 1.95, "learning_rate": 3.605240912933221e-05, "loss": 165.9421, "step": 2310, "task_loss": 2.8399152755737305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499832636511785, "compression/movement_sparsity/importance_threshold": -5.092220913185258e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9402290246259818, "compression/movement_sparsity/model_sparsity": 0.9079292651472769, "compression_loss": 160.33702087402344, "distillation_loss": 6.948454856872559, "epoch": 1.95, "learning_rate": 3.604637121120637e-05, "loss": 166.7524, "step": 2311, "task_loss": 2.875445604324341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499841291483229, "compression/movement_sparsity/importance_threshold": -4.828883747708818e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9402881684974557, "compression/movement_sparsity/model_sparsity": 0.9079863772448178, "compression_loss": 160.3366241455078, "distillation_loss": 7.841369152069092, "epoch": 1.95, "learning_rate": 3.604033329308055e-05, "loss": 166.2384, "step": 2312, "task_loss": 3.943207263946533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499849642803174, "compression/movement_sparsity/importance_threshold": -4.574785517977298e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9403017620485605, "compression/movement_sparsity/model_sparsity": 0.9079995038156236, "compression_loss": 160.33612060546875, "distillation_loss": 6.916234970092773, "epoch": 1.95, "learning_rate": 3.603429537495472e-05, "loss": 167.5723, "step": 2313, "task_loss": 4.204134464263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998576958939684, "compression/movement_sparsity/importance_threshold": -4.3297612429930854e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.940318384338245, "compression/movement_sparsity/model_sparsity": 0.9080155550785212, "compression_loss": 160.33570861816406, "distillation_loss": 6.801494598388672, "epoch": 1.96, "learning_rate": 3.602825745682888e-05, "loss": 166.8725, "step": 2314, "task_loss": 3.3035645484924316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998654561779603, "compression/movement_sparsity/importance_threshold": -4.093645941758567e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9403815466542121, "compression/movement_sparsity/model_sparsity": 0.9080765475746249, "compression_loss": 160.33543395996094, "distillation_loss": 8.283699035644531, "epoch": 1.96, "learning_rate": 3.6022219538703055e-05, "loss": 166.9866, "step": 2315, "task_loss": 3.5167155265808105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499872929077498, "compression/movement_sparsity/importance_threshold": -3.866274633276129e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9404351338635677, "compression/movement_sparsity/model_sparsity": 0.9081282938984855, "compression_loss": 160.33493041992188, "distillation_loss": 6.499887466430664, "epoch": 1.96, "learning_rate": 3.601618162057723e-05, "loss": 166.494, "step": 2316, "task_loss": 3.560060739517212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998801200149292, "compression/movement_sparsity/importance_threshold": -3.6474823365568315e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9405057130118043, "compression/movement_sparsity/model_sparsity": 0.9081964484358533, "compression_loss": 160.33448791503906, "distillation_loss": 5.3695502281188965, "epoch": 1.96, "learning_rate": 3.6010143702451396e-05, "loss": 166.0111, "step": 2317, "task_loss": 3.145541191101074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998870344126027, "compression/movement_sparsity/importance_threshold": -3.4371040706030614e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.94054416845243, "compression/movement_sparsity/model_sparsity": 0.9082335828137907, "compression_loss": 160.33404541015625, "distillation_loss": 5.96377420425415, "epoch": 1.96, "learning_rate": 3.6004105784325563e-05, "loss": 166.1986, "step": 2318, "task_loss": 2.40494704246521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998936776928664, "compression/movement_sparsity/importance_threshold": -3.234974854417205e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9405915670187825, "compression/movement_sparsity/model_sparsity": 0.9082793530935741, "compression_loss": 160.33348083496094, "distillation_loss": 6.279591083526611, "epoch": 1.96, "learning_rate": 3.599806786619974e-05, "loss": 166.3923, "step": 2319, "task_loss": 2.278113842010498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999000552780686, "compression/movement_sparsity/importance_threshold": -3.040929707010323e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9406146522073257, "compression/movement_sparsity/model_sparsity": 0.9083016452348723, "compression_loss": 160.3328857421875, "distillation_loss": 7.564520835876465, "epoch": 1.96, "learning_rate": 3.5992029948073905e-05, "loss": 167.1486, "step": 2320, "task_loss": 4.302159309387207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999061725905571, "compression/movement_sparsity/importance_threshold": -2.854803647376128e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9406433059821546, "compression/movement_sparsity/model_sparsity": 0.9083293146643866, "compression_loss": 160.3323516845703, "distillation_loss": 5.496919631958008, "epoch": 1.96, "learning_rate": 3.598599202994807e-05, "loss": 166.971, "step": 2321, "task_loss": 3.1690382957458496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499912035052681, "compression/movement_sparsity/importance_threshold": -2.6764316945343536e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9407071956723475, "compression/movement_sparsity/model_sparsity": 0.9083910095471738, "compression_loss": 160.33177185058594, "distillation_loss": 5.889617919921875, "epoch": 1.96, "learning_rate": 3.5979954111822246e-05, "loss": 166.5717, "step": 2322, "task_loss": 3.0783376693725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999176480867873, "compression/movement_sparsity/importance_threshold": -2.5056488674700395e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9407848577761599, "compression/movement_sparsity/model_sparsity": 0.9084660037188036, "compression_loss": 160.3311004638672, "distillation_loss": 6.857034683227539, "epoch": 1.96, "learning_rate": 3.597391619369642e-05, "loss": 166.5412, "step": 2323, "task_loss": 2.7734100818634033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999230171152251, "compression/movement_sparsity/importance_threshold": -2.3422901852029193e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9408720472899134, "compression/movement_sparsity/model_sparsity": 0.9085501980045333, "compression_loss": 160.3304901123047, "distillation_loss": 6.121474266052246, "epoch": 1.96, "learning_rate": 3.596787827557058e-05, "loss": 166.3995, "step": 2324, "task_loss": 2.63283634185791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999281475603423, "compression/movement_sparsity/importance_threshold": -2.1861906667353798e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9408993059371289, "compression/movement_sparsity/model_sparsity": 0.9085765202333596, "compression_loss": 160.3297576904297, "distillation_loss": 6.814990043640137, "epoch": 1.96, "learning_rate": 3.5961840357444754e-05, "loss": 167.1171, "step": 2325, "task_loss": 3.5269968509674072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999330448444872, "compression/movement_sparsity/importance_threshold": -2.0371853310611338e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9409677029626883, "compression/movement_sparsity/model_sparsity": 0.9086425676106771, "compression_loss": 160.32911682128906, "distillation_loss": 6.245388984680176, "epoch": 1.97, "learning_rate": 3.595580243931893e-05, "loss": 166.9564, "step": 2326, "task_loss": 3.6393094062805176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499937714390008, "compression/movement_sparsity/importance_threshold": -1.895109197199915e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9410301140560944, "compression/movement_sparsity/model_sparsity": 0.9087028346910256, "compression_loss": 160.32839965820312, "distillation_loss": 6.248898506164551, "epoch": 1.97, "learning_rate": 3.5949764521193095e-05, "loss": 165.8482, "step": 2327, "task_loss": 3.804799795150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999421616192525, "compression/movement_sparsity/importance_threshold": -1.7597972841541104e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9410607949394214, "compression/movement_sparsity/model_sparsity": 0.908732461591625, "compression_loss": 160.3275604248047, "distillation_loss": 5.294013977050781, "epoch": 1.97, "learning_rate": 3.594372660306726e-05, "loss": 166.092, "step": 2328, "task_loss": 2.438602924346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999463919545695, "compression/movement_sparsity/importance_threshold": -1.6310846109174326e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9410361715332534, "compression/movement_sparsity/model_sparsity": 0.9087086840752093, "compression_loss": 160.3267364501953, "distillation_loss": 6.122711658477783, "epoch": 1.97, "learning_rate": 3.5937688684941436e-05, "loss": 166.5029, "step": 2329, "task_loss": 3.0187790393829346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999504108183066, "compression/movement_sparsity/importance_threshold": -1.508806196500942e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9410731841495951, "compression/movement_sparsity/model_sparsity": 0.9087444251943155, "compression_loss": 160.3258819580078, "distillation_loss": 7.186318397521973, "epoch": 1.97, "learning_rate": 3.5931650766815604e-05, "loss": 167.4734, "step": 2330, "task_loss": 3.3950819969177246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999542236328125, "compression/movement_sparsity/importance_threshold": -1.392797059907025e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9411659184012992, "compression/movement_sparsity/model_sparsity": 0.9088339737391897, "compression_loss": 160.32492065429688, "distillation_loss": 7.055994033813477, "epoch": 1.97, "learning_rate": 3.592561284868977e-05, "loss": 166.727, "step": 2331, "task_loss": 3.6077022552490234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999578358204353, "compression/movement_sparsity/importance_threshold": -1.282892220146742e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9411830534301919, "compression/movement_sparsity/model_sparsity": 0.9088505201271264, "compression_loss": 160.3240509033203, "distillation_loss": 6.517487525939941, "epoch": 1.97, "learning_rate": 3.5919574930563945e-05, "loss": 166.131, "step": 2332, "task_loss": 3.4898476600646973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999612528035227, "compression/movement_sparsity/importance_threshold": -1.1789266962224793e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9411969927821583, "compression/movement_sparsity/model_sparsity": 0.9088639806194703, "compression_loss": 160.32321166992188, "distillation_loss": 5.848492622375488, "epoch": 1.97, "learning_rate": 3.591353701243811e-05, "loss": 166.6086, "step": 2333, "task_loss": 3.5669665336608887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999644800044236, "compression/movement_sparsity/importance_threshold": -1.0807355071279501e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.941202764079294, "compression/movement_sparsity/model_sparsity": 0.9088695536547948, "compression_loss": 160.3223114013672, "distillation_loss": 8.408719062805176, "epoch": 1.97, "learning_rate": 3.590749909431228e-05, "loss": 167.1881, "step": 2334, "task_loss": 4.510208606719971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999675228454856, "compression/movement_sparsity/importance_threshold": -9.881536718828882e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9412543241801514, "compression/movement_sparsity/model_sparsity": 0.9089193425075704, "compression_loss": 160.3214111328125, "distillation_loss": 4.212553024291992, "epoch": 1.97, "learning_rate": 3.590146117618645e-05, "loss": 166.0254, "step": 2335, "task_loss": 1.791365623474121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999703867490572, "compression/movement_sparsity/importance_threshold": -9.010162094896801e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9412577702645982, "compression/movement_sparsity/model_sparsity": 0.9089226702084151, "compression_loss": 160.3204345703125, "distillation_loss": 7.156421661376953, "epoch": 1.97, "learning_rate": 3.589542325806062e-05, "loss": 166.9149, "step": 2336, "task_loss": 3.752372980117798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999730771374868, "compression/movement_sparsity/importance_threshold": -8.191581389420388e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.941311154763104, "compression/movement_sparsity/model_sparsity": 0.9089742207851672, "compression_loss": 160.3194580078125, "distillation_loss": 7.032711029052734, "epoch": 1.97, "learning_rate": 3.5889385339934794e-05, "loss": 166.0417, "step": 2337, "task_loss": 2.7310333251953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499975599433122, "compression/movement_sparsity/importance_threshold": -7.424144792510246e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9413510768763489, "compression/movement_sparsity/model_sparsity": 0.9090127714510073, "compression_loss": 160.3184356689453, "distillation_loss": 5.627411842346191, "epoch": 1.98, "learning_rate": 3.588334742180896e-05, "loss": 166.1268, "step": 2338, "task_loss": 4.962368011474609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999779590583118, "compression/movement_sparsity/importance_threshold": -6.706202494276975e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414048667965542, "compression/movement_sparsity/model_sparsity": 0.9090647135219765, "compression_loss": 160.3174285888672, "distillation_loss": 6.0490217208862305, "epoch": 1.98, "learning_rate": 3.5877309503683135e-05, "loss": 165.0236, "step": 2339, "task_loss": 2.152772903442383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999801614354036, "compression/movement_sparsity/importance_threshold": -6.036104684657706e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9413920721646809, "compression/movement_sparsity/model_sparsity": 0.9090523584250689, "compression_loss": 160.31634521484375, "distillation_loss": 7.236522674560547, "epoch": 1.98, "learning_rate": 3.58712715855573e-05, "loss": 166.6133, "step": 2340, "task_loss": 3.228642225265503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499982211986746, "compression/movement_sparsity/importance_threshold": -5.4122015536763046e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414497612877033, "compression/movement_sparsity/model_sparsity": 0.909108065749243, "compression_loss": 160.3153533935547, "distillation_loss": 7.498824119567871, "epoch": 1.98, "learning_rate": 3.586523366743147e-05, "loss": 166.5062, "step": 2341, "task_loss": 3.1690895557403564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999841161346872, "compression/movement_sparsity/importance_threshold": -4.832843291616845e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414312549795324, "compression/movement_sparsity/model_sparsity": 0.9090901951896898, "compression_loss": 160.3143768310547, "distillation_loss": 7.292119979858398, "epoch": 1.98, "learning_rate": 3.5859195749305644e-05, "loss": 165.8706, "step": 2342, "task_loss": 3.551690101623535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999858793015752, "compression/movement_sparsity/importance_threshold": -4.296380088242985e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414406273752942, "compression/movement_sparsity/model_sparsity": 0.9090992456148244, "compression_loss": 160.31329345703125, "distillation_loss": 6.0655317306518555, "epoch": 1.98, "learning_rate": 3.585315783117981e-05, "loss": 167.0495, "step": 2343, "task_loss": 2.7222015857696533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999875069097586, "compression/movement_sparsity/importance_threshold": -3.801162133752062e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414345222014646, "compression/movement_sparsity/model_sparsity": 0.9090933501724975, "compression_loss": 160.31227111816406, "distillation_loss": 5.832024574279785, "epoch": 1.98, "learning_rate": 3.584711991305398e-05, "loss": 165.6878, "step": 2344, "task_loss": 2.6053621768951416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999890043815851, "compression/movement_sparsity/importance_threshold": -3.345539618167942e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414276896534093, "compression/movement_sparsity/model_sparsity": 0.9090867523434872, "compression_loss": 160.3112030029297, "distillation_loss": 5.7712554931640625, "epoch": 1.98, "learning_rate": 3.584108199492815e-05, "loss": 166.9068, "step": 2345, "task_loss": 3.3420400619506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999903771394032, "compression/movement_sparsity/importance_threshold": -2.9278627315144912e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414371574425121, "compression/movement_sparsity/model_sparsity": 0.9090958948849082, "compression_loss": 160.3101806640625, "distillation_loss": 5.332365989685059, "epoch": 1.98, "learning_rate": 3.583504407680232e-05, "loss": 165.5881, "step": 2346, "task_loss": 2.162992238998413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999916306055607, "compression/movement_sparsity/importance_threshold": -2.5464816638155752e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414648572839303, "compression/movement_sparsity/model_sparsity": 0.9091226431515589, "compression_loss": 160.30894470214844, "distillation_loss": 6.155909061431885, "epoch": 1.98, "learning_rate": 3.582900615867649e-05, "loss": 165.6931, "step": 2347, "task_loss": 3.164666175842285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999927702024065, "compression/movement_sparsity/importance_threshold": -2.199746605181796e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9414698654343373, "compression/movement_sparsity/model_sparsity": 0.9091274792565925, "compression_loss": 160.3078155517578, "distillation_loss": 5.989180564880371, "epoch": 1.98, "learning_rate": 3.582296824055066e-05, "loss": 166.4385, "step": 2348, "task_loss": 3.946364402770996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999938013522884, "compression/movement_sparsity/importance_threshold": -1.8860077456370195e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9415318591818762, "compression/movement_sparsity/model_sparsity": 0.9091873433281884, "compression_loss": 160.306640625, "distillation_loss": 8.105067253112793, "epoch": 1.99, "learning_rate": 3.581693032242483e-05, "loss": 166.4561, "step": 2349, "task_loss": 3.174832344055176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999947294775544, "compression/movement_sparsity/importance_threshold": -1.6036152752051114e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9415299513150545, "compression/movement_sparsity/model_sparsity": 0.9091855010024612, "compression_loss": 160.30557250976562, "distillation_loss": 6.455706596374512, "epoch": 1.99, "learning_rate": 3.5810892404299e-05, "loss": 166.6083, "step": 2350, "task_loss": 4.323982238769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999955600005528, "compression/movement_sparsity/importance_threshold": -1.3509193839099376e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9416095093615209, "compression/movement_sparsity/model_sparsity": 0.9092623259852824, "compression_loss": 160.30433654785156, "distillation_loss": 7.006874084472656, "epoch": 1.99, "learning_rate": 3.580485448617317e-05, "loss": 167.0213, "step": 2351, "task_loss": 3.4319732189178467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999962983436324, "compression/movement_sparsity/importance_threshold": -1.1262702618621001e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9416884711996056, "compression/movement_sparsity/model_sparsity": 0.9093385752413139, "compression_loss": 160.3032684326172, "distillation_loss": 6.511745452880859, "epoch": 1.99, "learning_rate": 3.5798816568047336e-05, "loss": 166.3171, "step": 2352, "task_loss": 3.790663242340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999969499291403, "compression/movement_sparsity/importance_threshold": -9.280180990854647e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9417176734861459, "compression/movement_sparsity/model_sparsity": 0.9093667743394747, "compression_loss": 160.30201721191406, "distillation_loss": 7.695958137512207, "epoch": 1.99, "learning_rate": 3.579277864992151e-05, "loss": 166.9726, "step": 2353, "task_loss": 3.62270450592041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999975201794253, "compression/movement_sparsity/importance_threshold": -7.545130856038973e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9417636650007174, "compression/movement_sparsity/model_sparsity": 0.9094111859040342, "compression_loss": 160.3009033203125, "distillation_loss": 7.470867156982422, "epoch": 1.99, "learning_rate": 3.578674073179568e-05, "loss": 166.7849, "step": 2354, "task_loss": 2.661835193634033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499998014516836, "compression/movement_sparsity/importance_threshold": -6.041054114412636e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9418008803279089, "compression/movement_sparsity/model_sparsity": 0.909447122770249, "compression_loss": 160.29977416992188, "distillation_loss": 6.278006553649902, "epoch": 1.99, "learning_rate": 3.5780702813669844e-05, "loss": 166.408, "step": 2355, "task_loss": 2.809206485748291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49999843836372, "compression/movement_sparsity/importance_threshold": -4.751452667081657e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.941894008077145, "compression/movement_sparsity/model_sparsity": 0.9095370512948043, "compression_loss": 160.2986602783203, "distillation_loss": 6.513699531555176, "epoch": 1.99, "learning_rate": 3.577466489554402e-05, "loss": 166.3174, "step": 2356, "task_loss": 3.2685694694519043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999987971424256, "compression/movement_sparsity/importance_threshold": -3.6598284142846937e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9419025577053399, "compression/movement_sparsity/model_sparsity": 0.909545307216969, "compression_loss": 160.2974090576172, "distillation_loss": 8.049238204956055, "epoch": 1.99, "learning_rate": 3.576862697741819e-05, "loss": 166.8908, "step": 2357, "task_loss": 3.2528319358825684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999990962753007, "compression/movement_sparsity/importance_threshold": -2.7496832562604046e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9419213144210311, "compression/movement_sparsity/model_sparsity": 0.9095634195817739, "compression_loss": 160.2963409423828, "distillation_loss": 7.633810520172119, "epoch": 1.99, "learning_rate": 3.576258905929236e-05, "loss": 166.9125, "step": 2358, "task_loss": 3.777378559112549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999993411846944, "compression/movement_sparsity/importance_threshold": -2.0045190941148094e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.941962643586057, "compression/movement_sparsity/model_sparsity": 0.9096033289628378, "compression_loss": 160.2950897216797, "distillation_loss": 7.555692195892334, "epoch": 1.99, "learning_rate": 3.5756551141166526e-05, "loss": 166.6265, "step": 2359, "task_loss": 3.1981210708618164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999537292954, "compression/movement_sparsity/importance_threshold": -1.407837827219205e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.941944173050389, "compression/movement_sparsity/model_sparsity": 0.909585492946892, "compression_loss": 160.2939910888672, "distillation_loss": 8.960050582885742, "epoch": 1.99, "learning_rate": 3.57505132230407e-05, "loss": 166.9505, "step": 2360, "task_loss": 4.03400182723999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999996900224282, "compression/movement_sparsity/importance_threshold": -9.43141356679611e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9419781211556484, "compression/movement_sparsity/model_sparsity": 0.9096182748302991, "compression_loss": 160.29286193847656, "distillation_loss": 5.304047584533691, "epoch": 2.0, "learning_rate": 3.574447530491487e-05, "loss": 166.2889, "step": 2361, "task_loss": 2.0510902404785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999804795465, "compression/movement_sparsity/importance_threshold": -5.939315836020476e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9420176616955289, "compression/movement_sparsity/model_sparsity": 0.9096564570309937, "compression_loss": 160.29153442382812, "distillation_loss": 8.29934024810791, "epoch": 2.0, "learning_rate": 3.5738437386789035e-05, "loss": 167.4791, "step": 2362, "task_loss": 3.6830313205718994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999998870344127, "compression/movement_sparsity/importance_threshold": -3.437104073578112e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9420447414802299, "compression/movement_sparsity/model_sparsity": 0.9096826065417831, "compression_loss": 160.29046630859375, "distillation_loss": 5.714633464813232, "epoch": 2.0, "learning_rate": 3.573239946866321e-05, "loss": 166.59, "step": 2363, "task_loss": 2.2213969230651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999999421616192, "compression/movement_sparsity/importance_threshold": -1.7597972818556018e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.942013989051897, "compression/movement_sparsity/model_sparsity": 0.909652910553969, "compression_loss": 160.289306640625, "distillation_loss": 6.498959064483643, "epoch": 2.0, "learning_rate": 3.5726361550537376e-05, "loss": 166.1531, "step": 2364, "task_loss": 3.3319220542907715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999975599433, "compression/movement_sparsity/importance_threshold": -7.424144805867616e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9420308379007665, "compression/movement_sparsity/model_sparsity": 0.9096691805930467, "compression_loss": 160.28810119628906, "distillation_loss": 7.490935802459717, "epoch": 2.0, "learning_rate": 3.572032363241154e-05, "loss": 167.1614, "step": 2365, "task_loss": 3.5815749168395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999999927702024, "compression/movement_sparsity/importance_threshold": -2.1997466348455674e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9420491772705905, "compression/movement_sparsity/model_sparsity": 0.9096868899490987, "compression_loss": 160.28704833984375, "distillation_loss": 6.73069953918457, "epoch": 2.0, "learning_rate": 3.571428571428572e-05, "loss": 166.1742, "step": 2366, "task_loss": 2.4928009510040283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 5.794016361236572, "epoch": 2.0, "learning_rate": 3.570824779615989e-05, "loss": 131.6317, "step": 2367, "task_loss": 2.8381009101867676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 6.008797645568848, "epoch": 2.0, "learning_rate": 3.570220987803405e-05, "loss": 5.5539, "step": 2368, "task_loss": 2.2601335048675537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 4.821380615234375, "epoch": 2.0, "learning_rate": 3.5696171959908225e-05, "loss": 5.18, "step": 2369, "task_loss": 2.0644631385803223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 4.630273818969727, "epoch": 2.0, "learning_rate": 3.56901340417824e-05, "loss": 4.5273, "step": 2370, "task_loss": 2.232436418533325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 5.392454147338867, "epoch": 2.0, "learning_rate": 3.568409612365656e-05, "loss": 4.0314, "step": 2371, "task_loss": 2.859858512878418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.579754114151001, "epoch": 2.01, "learning_rate": 3.5678058205530734e-05, "loss": 3.8648, "step": 2372, "task_loss": 2.16394305229187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 4.485686302185059, "epoch": 2.01, "learning_rate": 3.567202028740491e-05, "loss": 3.5806, "step": 2373, "task_loss": 1.6243481636047363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.8978517055511475, "epoch": 2.01, "learning_rate": 3.5665982369279075e-05, "loss": 3.6027, "step": 2374, "task_loss": 1.5872914791107178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.2786474227905273, "epoch": 2.01, "learning_rate": 3.565994445115324e-05, "loss": 3.3165, "step": 2375, "task_loss": 0.8725880980491638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.767122507095337, "epoch": 2.01, "learning_rate": 3.5653906533027416e-05, "loss": 2.4885, "step": 2376, "task_loss": 1.1516541242599487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.646667718887329, "epoch": 2.01, "learning_rate": 3.564786861490158e-05, "loss": 3.203, "step": 2377, "task_loss": 2.30611515045166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.2585976123809814, "epoch": 2.01, "learning_rate": 3.564183069677575e-05, "loss": 2.6552, "step": 2378, "task_loss": 1.1936619281768799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.1175904273986816, "epoch": 2.01, "learning_rate": 3.5635792778649924e-05, "loss": 2.8798, "step": 2379, "task_loss": 1.9321706295013428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.603166103363037, "epoch": 2.01, "learning_rate": 3.56297548605241e-05, "loss": 2.7689, "step": 2380, "task_loss": 1.2533173561096191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.8723547458648682, "epoch": 2.01, "learning_rate": 3.562371694239826e-05, "loss": 2.7127, "step": 2381, "task_loss": 1.7071744203567505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7016043663024902, "epoch": 2.01, "learning_rate": 3.561767902427243e-05, "loss": 2.1324, "step": 2382, "task_loss": 1.450547456741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.9803593158721924, "epoch": 2.01, "learning_rate": 3.5611641106146607e-05, "loss": 2.2638, "step": 2383, "task_loss": 1.4079058170318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.2916595935821533, "epoch": 2.02, "learning_rate": 3.560560318802077e-05, "loss": 2.1265, "step": 2384, "task_loss": 1.376882553100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.843372344970703, "epoch": 2.02, "learning_rate": 3.559956526989494e-05, "loss": 2.6347, "step": 2385, "task_loss": 1.6197419166564941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.6076645851135254, "epoch": 2.02, "learning_rate": 3.5593527351769115e-05, "loss": 2.4356, "step": 2386, "task_loss": 1.2698694467544556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5698668956756592, "epoch": 2.02, "learning_rate": 3.558748943364328e-05, "loss": 2.0795, "step": 2387, "task_loss": 1.0015205144882202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.4925687313079834, "epoch": 2.02, "learning_rate": 3.558145151551745e-05, "loss": 2.3888, "step": 2388, "task_loss": 1.4927409887313843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.024552345275879, "epoch": 2.02, "learning_rate": 3.557541359739162e-05, "loss": 2.7326, "step": 2389, "task_loss": 1.8711820840835571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.1021745204925537, "epoch": 2.02, "learning_rate": 3.556937567926579e-05, "loss": 2.014, "step": 2390, "task_loss": 1.9234135150909424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.8932304382324219, "epoch": 2.02, "learning_rate": 3.556333776113996e-05, "loss": 1.925, "step": 2391, "task_loss": 0.9768822193145752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.9730640649795532, "epoch": 2.02, "learning_rate": 3.555729984301413e-05, "loss": 2.3824, "step": 2392, "task_loss": 1.5152168273925781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7444813251495361, "epoch": 2.02, "learning_rate": 3.55512619248883e-05, "loss": 1.8275, "step": 2393, "task_loss": 1.8751744031906128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.674318313598633, "epoch": 2.02, "learning_rate": 3.5545224006762466e-05, "loss": 2.566, "step": 2394, "task_loss": 1.9626497030258179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.885780096054077, "epoch": 2.02, "learning_rate": 3.553918608863664e-05, "loss": 2.4257, "step": 2395, "task_loss": 2.4011831283569336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.688016653060913, "epoch": 2.03, "learning_rate": 3.5533148170510814e-05, "loss": 1.7059, "step": 2396, "task_loss": 1.5597035884857178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6211036443710327, "epoch": 2.03, "learning_rate": 3.552711025238498e-05, "loss": 1.5099, "step": 2397, "task_loss": 1.206658124923706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7738001346588135, "epoch": 2.03, "learning_rate": 3.552107233425915e-05, "loss": 1.6583, "step": 2398, "task_loss": 1.1543632745742798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.8059853315353394, "epoch": 2.03, "learning_rate": 3.551503441613332e-05, "loss": 1.8422, "step": 2399, "task_loss": 1.884414792060852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.048482894897461, "epoch": 2.03, "learning_rate": 3.550899649800749e-05, "loss": 2.1733, "step": 2400, "task_loss": 1.3014881610870361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.006402015686035, "epoch": 2.03, "learning_rate": 3.5502958579881656e-05, "loss": 2.1443, "step": 2401, "task_loss": 1.1606464385986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.1068930625915527, "epoch": 2.03, "learning_rate": 3.549692066175583e-05, "loss": 1.8529, "step": 2402, "task_loss": 2.0201778411865234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.111406922340393, "epoch": 2.03, "learning_rate": 3.549088274363e-05, "loss": 1.5961, "step": 2403, "task_loss": 0.9130090475082397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4888978004455566, "epoch": 2.03, "learning_rate": 3.5484844825504165e-05, "loss": 1.8803, "step": 2404, "task_loss": 1.5416336059570312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7527201175689697, "epoch": 2.03, "learning_rate": 3.547880690737834e-05, "loss": 2.0179, "step": 2405, "task_loss": 0.8192425966262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1548995971679688, "epoch": 2.03, "learning_rate": 3.5472768989252506e-05, "loss": 1.161, "step": 2406, "task_loss": 0.5419263243675232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.169149398803711, "epoch": 2.03, "learning_rate": 3.546673107112668e-05, "loss": 1.6494, "step": 2407, "task_loss": 0.2783471643924713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1632423400878906, "epoch": 2.04, "learning_rate": 3.546069315300085e-05, "loss": 1.1968, "step": 2408, "task_loss": 1.829977035522461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4465243816375732, "epoch": 2.04, "learning_rate": 3.5454655234875014e-05, "loss": 1.5152, "step": 2409, "task_loss": 1.3301951885223389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1828058958053589, "epoch": 2.04, "learning_rate": 3.544861731674919e-05, "loss": 1.5683, "step": 2410, "task_loss": 0.5418788194656372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6635905504226685, "epoch": 2.04, "learning_rate": 3.5442579398623355e-05, "loss": 1.5841, "step": 2411, "task_loss": 1.256627082824707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2581639289855957, "epoch": 2.04, "learning_rate": 3.543654148049753e-05, "loss": 1.7816, "step": 2412, "task_loss": 1.2460963726043701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1339378356933594, "epoch": 2.04, "learning_rate": 3.5430503562371697e-05, "loss": 1.5445, "step": 2413, "task_loss": 0.8661832213401794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0598548650741577, "epoch": 2.04, "learning_rate": 3.5424465644245864e-05, "loss": 0.9845, "step": 2414, "task_loss": 0.7637588381767273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.230006694793701, "epoch": 2.04, "learning_rate": 3.541842772612004e-05, "loss": 1.7225, "step": 2415, "task_loss": 1.293196439743042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1976304054260254, "epoch": 2.04, "learning_rate": 3.5412389807994205e-05, "loss": 1.4816, "step": 2416, "task_loss": 0.8857174515724182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.113510012626648, "epoch": 2.04, "learning_rate": 3.540635188986838e-05, "loss": 1.6252, "step": 2417, "task_loss": 0.7524394989013672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.653610348701477, "epoch": 2.04, "learning_rate": 3.5400313971742546e-05, "loss": 1.4815, "step": 2418, "task_loss": 0.8934035301208496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.234905481338501, "epoch": 2.04, "learning_rate": 3.539427605361671e-05, "loss": 1.9128, "step": 2419, "task_loss": 0.856835126876831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.098511815071106, "epoch": 2.05, "learning_rate": 3.538823813549089e-05, "loss": 1.418, "step": 2420, "task_loss": 0.6759884357452393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7596487998962402, "epoch": 2.05, "learning_rate": 3.5382200217365054e-05, "loss": 1.7011, "step": 2421, "task_loss": 1.2066035270690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.4780449867248535, "epoch": 2.05, "learning_rate": 3.537616229923922e-05, "loss": 1.7158, "step": 2422, "task_loss": 2.2709736824035645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5654544830322266, "epoch": 2.05, "learning_rate": 3.5370124381113395e-05, "loss": 1.6715, "step": 2423, "task_loss": 1.018605351448059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.8508186340332031, "epoch": 2.05, "learning_rate": 3.536408646298756e-05, "loss": 1.9541, "step": 2424, "task_loss": 0.8859961032867432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.0176711082458496, "epoch": 2.05, "learning_rate": 3.535804854486173e-05, "loss": 1.8051, "step": 2425, "task_loss": 1.6450481414794922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.266354560852051, "epoch": 2.05, "learning_rate": 3.5352010626735904e-05, "loss": 1.8908, "step": 2426, "task_loss": 2.2552289962768555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2633095979690552, "epoch": 2.05, "learning_rate": 3.534597270861008e-05, "loss": 1.1239, "step": 2427, "task_loss": 1.139604091644287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8411358594894409, "epoch": 2.05, "learning_rate": 3.533993479048424e-05, "loss": 1.6262, "step": 2428, "task_loss": 0.5337929725646973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.513352155685425, "epoch": 2.05, "learning_rate": 3.533389687235841e-05, "loss": 1.7072, "step": 2429, "task_loss": 1.131248116493225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.76312255859375, "epoch": 2.05, "learning_rate": 3.5327858954232586e-05, "loss": 1.7512, "step": 2430, "task_loss": 1.7146672010421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.882275402545929, "epoch": 2.05, "learning_rate": 3.532182103610675e-05, "loss": 1.369, "step": 2431, "task_loss": 0.7552844285964966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3863966464996338, "epoch": 2.06, "learning_rate": 3.531578311798092e-05, "loss": 1.6122, "step": 2432, "task_loss": 1.0116581916809082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.161930561065674, "epoch": 2.06, "learning_rate": 3.5309745199855094e-05, "loss": 1.5809, "step": 2433, "task_loss": 1.4254279136657715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9569919109344482, "epoch": 2.06, "learning_rate": 3.530370728172926e-05, "loss": 1.3159, "step": 2434, "task_loss": 1.2949148416519165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.1029558181762695, "epoch": 2.06, "learning_rate": 3.529766936360343e-05, "loss": 1.5769, "step": 2435, "task_loss": 1.9820683002471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.9795022010803223, "epoch": 2.06, "learning_rate": 3.52916314454776e-05, "loss": 1.7308, "step": 2436, "task_loss": 1.5297834873199463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.176500678062439, "epoch": 2.06, "learning_rate": 3.528559352735178e-05, "loss": 1.5442, "step": 2437, "task_loss": 0.5777260065078735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1056512594223022, "epoch": 2.06, "learning_rate": 3.527955560922594e-05, "loss": 1.6704, "step": 2438, "task_loss": 1.3359051942825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.409621000289917, "epoch": 2.06, "learning_rate": 3.527351769110011e-05, "loss": 1.6319, "step": 2439, "task_loss": 0.8464250564575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.383544921875, "epoch": 2.06, "learning_rate": 3.5267479772974285e-05, "loss": 1.59, "step": 2440, "task_loss": 1.6564099788665771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6310456991195679, "epoch": 2.06, "learning_rate": 3.5261441854848445e-05, "loss": 1.3342, "step": 2441, "task_loss": 1.016051173210144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.312596321105957, "epoch": 2.06, "learning_rate": 3.525540393672262e-05, "loss": 1.0807, "step": 2442, "task_loss": 0.5171737670898438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.663266181945801, "epoch": 2.07, "learning_rate": 3.524936601859679e-05, "loss": 1.527, "step": 2443, "task_loss": 2.172339677810669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7688603401184082, "epoch": 2.07, "learning_rate": 3.5243328100470954e-05, "loss": 1.4229, "step": 2444, "task_loss": 1.9007397890090942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3331525325775146, "epoch": 2.07, "learning_rate": 3.523729018234513e-05, "loss": 1.5247, "step": 2445, "task_loss": 1.0954235792160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3711392879486084, "epoch": 2.07, "learning_rate": 3.52312522642193e-05, "loss": 1.4729, "step": 2446, "task_loss": 1.2132072448730469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2634410858154297, "epoch": 2.07, "learning_rate": 3.522521434609347e-05, "loss": 1.2304, "step": 2447, "task_loss": 0.6916882395744324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.019486904144287, "epoch": 2.07, "learning_rate": 3.5219176427967636e-05, "loss": 1.7764, "step": 2448, "task_loss": 1.7461954355239868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.407402515411377, "epoch": 2.07, "learning_rate": 3.521313850984181e-05, "loss": 1.0428, "step": 2449, "task_loss": 1.012795329093933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9032294750213623, "epoch": 2.07, "learning_rate": 3.520710059171598e-05, "loss": 1.2082, "step": 2450, "task_loss": 0.12767401337623596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3116698265075684, "epoch": 2.07, "learning_rate": 3.5201062673590144e-05, "loss": 1.259, "step": 2451, "task_loss": 0.6800302267074585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.600150227546692, "epoch": 2.07, "learning_rate": 3.519502475546432e-05, "loss": 1.3536, "step": 2452, "task_loss": 1.9611575603485107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6851948499679565, "epoch": 2.07, "learning_rate": 3.518898683733849e-05, "loss": 1.6067, "step": 2453, "task_loss": 1.4527864456176758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6482417583465576, "epoch": 2.07, "learning_rate": 3.518294891921265e-05, "loss": 1.4196, "step": 2454, "task_loss": 0.9126148819923401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1837124824523926, "epoch": 2.08, "learning_rate": 3.517691100108683e-05, "loss": 1.482, "step": 2455, "task_loss": 1.0458905696868896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.624044418334961, "epoch": 2.08, "learning_rate": 3.5170873082961e-05, "loss": 1.3499, "step": 2456, "task_loss": 1.2754698991775513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7467665672302246, "epoch": 2.08, "learning_rate": 3.516483516483517e-05, "loss": 1.2571, "step": 2457, "task_loss": 1.8186559677124023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.71449875831604, "epoch": 2.08, "learning_rate": 3.5158797246709335e-05, "loss": 1.2227, "step": 2458, "task_loss": 1.1837830543518066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2735698223114014, "epoch": 2.08, "learning_rate": 3.515275932858351e-05, "loss": 1.2175, "step": 2459, "task_loss": 1.2002431154251099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.405210256576538, "epoch": 2.08, "learning_rate": 3.5146721410457676e-05, "loss": 1.5672, "step": 2460, "task_loss": 1.0881227254867554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3665781021118164, "epoch": 2.08, "learning_rate": 3.514068349233184e-05, "loss": 1.3352, "step": 2461, "task_loss": 1.4953579902648926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.41208016872406, "epoch": 2.08, "learning_rate": 3.513464557420602e-05, "loss": 1.3782, "step": 2462, "task_loss": 0.352135568857193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1963071823120117, "epoch": 2.08, "learning_rate": 3.5128607656080184e-05, "loss": 1.8499, "step": 2463, "task_loss": 0.4583376348018646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.74224853515625, "epoch": 2.08, "learning_rate": 3.512256973795435e-05, "loss": 1.3288, "step": 2464, "task_loss": 1.064825415611267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2254512310028076, "epoch": 2.08, "learning_rate": 3.5116531819828526e-05, "loss": 1.4212, "step": 2465, "task_loss": 1.3030978441238403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5794579982757568, "epoch": 2.08, "learning_rate": 3.511049390170269e-05, "loss": 1.3463, "step": 2466, "task_loss": 1.7503514289855957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1074600219726562, "epoch": 2.09, "learning_rate": 3.510445598357686e-05, "loss": 1.2721, "step": 2467, "task_loss": 0.8048511743545532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6260517835617065, "epoch": 2.09, "learning_rate": 3.5098418065451034e-05, "loss": 1.2349, "step": 2468, "task_loss": 1.097236156463623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2198338508605957, "epoch": 2.09, "learning_rate": 3.509238014732521e-05, "loss": 1.2554, "step": 2469, "task_loss": 1.596426248550415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3188869953155518, "epoch": 2.09, "learning_rate": 3.5086342229199375e-05, "loss": 1.1693, "step": 2470, "task_loss": 1.3935199975967407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9356237649917603, "epoch": 2.09, "learning_rate": 3.508030431107354e-05, "loss": 1.138, "step": 2471, "task_loss": 0.9571179151535034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9884477257728577, "epoch": 2.09, "learning_rate": 3.5074266392947716e-05, "loss": 1.3923, "step": 2472, "task_loss": 1.318662166595459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4606609344482422, "epoch": 2.09, "learning_rate": 3.506822847482188e-05, "loss": 1.4305, "step": 2473, "task_loss": 0.7601984143257141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.185983419418335, "epoch": 2.09, "learning_rate": 3.506219055669605e-05, "loss": 1.6359, "step": 2474, "task_loss": 1.1885576248168945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4593037366867065, "epoch": 2.09, "learning_rate": 3.5056152638570225e-05, "loss": 1.5689, "step": 2475, "task_loss": 0.5814553499221802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3374162912368774, "epoch": 2.09, "learning_rate": 3.505011472044439e-05, "loss": 1.1298, "step": 2476, "task_loss": 0.6473168730735779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.2450132369995117, "epoch": 2.09, "learning_rate": 3.504407680231856e-05, "loss": 1.2661, "step": 2477, "task_loss": 2.231135368347168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.686068058013916, "epoch": 2.09, "learning_rate": 3.503803888419273e-05, "loss": 1.386, "step": 2478, "task_loss": 1.2415560483932495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7942306995391846, "epoch": 2.1, "learning_rate": 3.50320009660669e-05, "loss": 1.2721, "step": 2479, "task_loss": 0.7026255130767822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4284751415252686, "epoch": 2.1, "learning_rate": 3.5025963047941074e-05, "loss": 1.4027, "step": 2480, "task_loss": 0.561096727848053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1545677185058594, "epoch": 2.1, "learning_rate": 3.501992512981524e-05, "loss": 1.3513, "step": 2481, "task_loss": 1.1893587112426758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0366015434265137, "epoch": 2.1, "learning_rate": 3.501388721168941e-05, "loss": 1.3185, "step": 2482, "task_loss": 0.5875089764595032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0594216585159302, "epoch": 2.1, "learning_rate": 3.500784929356358e-05, "loss": 1.0303, "step": 2483, "task_loss": 0.5982950329780579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2622240781784058, "epoch": 2.1, "learning_rate": 3.500181137543775e-05, "loss": 1.1256, "step": 2484, "task_loss": 1.3893510103225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8780015707015991, "epoch": 2.1, "learning_rate": 3.499577345731192e-05, "loss": 1.3606, "step": 2485, "task_loss": 0.7664692401885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420197248458862, "epoch": 2.1, "learning_rate": 3.498973553918609e-05, "loss": 1.1911, "step": 2486, "task_loss": 1.314092993736267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.0764689445495605, "epoch": 2.1, "learning_rate": 3.498369762106026e-05, "loss": 1.3682, "step": 2487, "task_loss": 2.0281178951263428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 3.4595751762390137, "epoch": 2.1, "learning_rate": 3.497765970293443e-05, "loss": 1.9426, "step": 2488, "task_loss": 3.259431838989258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6280409097671509, "epoch": 2.1, "learning_rate": 3.49716217848086e-05, "loss": 1.2463, "step": 2489, "task_loss": 2.0687222480773926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4340925216674805, "epoch": 2.1, "learning_rate": 3.496558386668277e-05, "loss": 1.1063, "step": 2490, "task_loss": 0.8355008363723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.973612368106842, "epoch": 2.11, "learning_rate": 3.495954594855694e-05, "loss": 1.278, "step": 2491, "task_loss": 1.3828065395355225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3168693780899048, "epoch": 2.11, "learning_rate": 3.495350803043111e-05, "loss": 1.0123, "step": 2492, "task_loss": 1.557356834411621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2896207571029663, "epoch": 2.11, "learning_rate": 3.494747011230528e-05, "loss": 1.505, "step": 2493, "task_loss": 1.459700107574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7636563777923584, "epoch": 2.11, "learning_rate": 3.494143219417945e-05, "loss": 1.2366, "step": 2494, "task_loss": 1.208235263824463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2116196155548096, "epoch": 2.11, "learning_rate": 3.4935394276053616e-05, "loss": 1.2082, "step": 2495, "task_loss": 0.7974462509155273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9458285570144653, "epoch": 2.11, "learning_rate": 3.492935635792779e-05, "loss": 1.2422, "step": 2496, "task_loss": 0.4279972016811371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.737341284751892, "epoch": 2.11, "learning_rate": 3.492331843980196e-05, "loss": 1.227, "step": 2497, "task_loss": 1.66383957862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2319836616516113, "epoch": 2.11, "learning_rate": 3.4917280521676124e-05, "loss": 1.1503, "step": 2498, "task_loss": 2.216526985168457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.143429756164551, "epoch": 2.11, "learning_rate": 3.49112426035503e-05, "loss": 1.2892, "step": 2499, "task_loss": 1.2699668407440186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.309948444366455, "epoch": 2.11, "learning_rate": 3.490520468542447e-05, "loss": 1.2272, "step": 2500, "task_loss": 0.486138254404068 }, { "epoch": 2.11, "eval_accuracy": 0.8563564356435643, "eval_loss": 0.7333118319511414, "eval_runtime": 229.2574, "eval_samples_per_second": 110.138, "eval_steps_per_second": 0.864, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7053568363189697, "epoch": 2.11, "learning_rate": 3.489916676729863e-05, "loss": 0.7985, "step": 2501, "task_loss": 1.072098970413208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6546218395233154, "epoch": 2.11, "learning_rate": 3.4893128849172806e-05, "loss": 1.3306, "step": 2502, "task_loss": 1.0386290550231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9856660962104797, "epoch": 2.12, "learning_rate": 3.488709093104698e-05, "loss": 1.1253, "step": 2503, "task_loss": 0.730956494808197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4524785280227661, "epoch": 2.12, "learning_rate": 3.488105301292115e-05, "loss": 1.2871, "step": 2504, "task_loss": 0.6943610906600952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3152192831039429, "epoch": 2.12, "learning_rate": 3.4875015094795315e-05, "loss": 1.225, "step": 2505, "task_loss": 1.2128413915634155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3625967502593994, "epoch": 2.12, "learning_rate": 3.486897717666949e-05, "loss": 1.35, "step": 2506, "task_loss": 1.2023200988769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6762778759002686, "epoch": 2.12, "learning_rate": 3.4862939258543656e-05, "loss": 1.2491, "step": 2507, "task_loss": 0.7340527772903442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0951451063156128, "epoch": 2.12, "learning_rate": 3.485690134041782e-05, "loss": 1.1814, "step": 2508, "task_loss": 1.0627542734146118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3382368087768555, "epoch": 2.12, "learning_rate": 3.4850863422292e-05, "loss": 1.4205, "step": 2509, "task_loss": 1.3101381063461304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.234015703201294, "epoch": 2.12, "learning_rate": 3.484482550416617e-05, "loss": 1.0078, "step": 2510, "task_loss": 0.5215551853179932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4782711267471313, "epoch": 2.12, "learning_rate": 3.483878758604033e-05, "loss": 1.5511, "step": 2511, "task_loss": 1.44635009765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8828892111778259, "epoch": 2.12, "learning_rate": 3.4832749667914505e-05, "loss": 1.454, "step": 2512, "task_loss": 1.097953200340271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.011842966079712, "epoch": 2.12, "learning_rate": 3.482671174978868e-05, "loss": 0.9572, "step": 2513, "task_loss": 0.5319902300834656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5200048685073853, "epoch": 2.13, "learning_rate": 3.482067383166284e-05, "loss": 1.3172, "step": 2514, "task_loss": 0.8237355351448059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5037763118743896, "epoch": 2.13, "learning_rate": 3.4814635913537013e-05, "loss": 1.5082, "step": 2515, "task_loss": 1.2203800678253174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3670780658721924, "epoch": 2.13, "learning_rate": 3.480859799541119e-05, "loss": 1.0011, "step": 2516, "task_loss": 1.9772387742996216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1681078672409058, "epoch": 2.13, "learning_rate": 3.480256007728535e-05, "loss": 0.8712, "step": 2517, "task_loss": 1.332604169845581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0971417427062988, "epoch": 2.13, "learning_rate": 3.479652215915952e-05, "loss": 1.1381, "step": 2518, "task_loss": 0.4346051812171936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5040347576141357, "epoch": 2.13, "learning_rate": 3.4790484241033696e-05, "loss": 1.1683, "step": 2519, "task_loss": 1.064577579498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3124432563781738, "epoch": 2.13, "learning_rate": 3.478444632290786e-05, "loss": 1.2684, "step": 2520, "task_loss": 0.8021447062492371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3003246784210205, "epoch": 2.13, "learning_rate": 3.477840840478203e-05, "loss": 1.3377, "step": 2521, "task_loss": 1.6360363960266113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9612334966659546, "epoch": 2.13, "learning_rate": 3.4772370486656204e-05, "loss": 1.1334, "step": 2522, "task_loss": 1.5336648225784302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.872597336769104, "epoch": 2.13, "learning_rate": 3.476633256853037e-05, "loss": 1.0612, "step": 2523, "task_loss": 0.4167312681674957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0671679973602295, "epoch": 2.13, "learning_rate": 3.476029465040454e-05, "loss": 1.1471, "step": 2524, "task_loss": 0.9894512891769409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.099616050720215, "epoch": 2.13, "learning_rate": 3.475425673227871e-05, "loss": 1.2886, "step": 2525, "task_loss": 1.6126190423965454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.053142786026001, "epoch": 2.14, "learning_rate": 3.4748218814152886e-05, "loss": 1.1567, "step": 2526, "task_loss": 0.8060246109962463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0318787097930908, "epoch": 2.14, "learning_rate": 3.474218089602705e-05, "loss": 1.1091, "step": 2527, "task_loss": 0.7580288648605347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3646152019500732, "epoch": 2.14, "learning_rate": 3.473614297790122e-05, "loss": 1.186, "step": 2528, "task_loss": 1.4335401058197021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4853459596633911, "epoch": 2.14, "learning_rate": 3.4730105059775395e-05, "loss": 1.1266, "step": 2529, "task_loss": 1.4229718446731567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1696252822875977, "epoch": 2.14, "learning_rate": 3.472406714164956e-05, "loss": 1.2161, "step": 2530, "task_loss": 1.6826777458190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.8503031730651855, "epoch": 2.14, "learning_rate": 3.471802922352373e-05, "loss": 1.4453, "step": 2531, "task_loss": 2.1888749599456787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.9229843616485596, "epoch": 2.14, "learning_rate": 3.47119913053979e-05, "loss": 1.1687, "step": 2532, "task_loss": 1.2211486101150513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8921827077865601, "epoch": 2.14, "learning_rate": 3.470595338727207e-05, "loss": 1.0006, "step": 2533, "task_loss": 2.0542569160461426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1033704280853271, "epoch": 2.14, "learning_rate": 3.469991546914624e-05, "loss": 1.194, "step": 2534, "task_loss": 1.0243548154830933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.020796298980713, "epoch": 2.14, "learning_rate": 3.469387755102041e-05, "loss": 1.2704, "step": 2535, "task_loss": 1.225877046585083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7907130718231201, "epoch": 2.14, "learning_rate": 3.468783963289458e-05, "loss": 1.2552, "step": 2536, "task_loss": 1.569359302520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2219887971878052, "epoch": 2.14, "learning_rate": 3.4681801714768746e-05, "loss": 1.1189, "step": 2537, "task_loss": 1.163564682006836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8630379438400269, "epoch": 2.15, "learning_rate": 3.467576379664292e-05, "loss": 1.1174, "step": 2538, "task_loss": 1.1075502634048462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.072967529296875, "epoch": 2.15, "learning_rate": 3.466972587851709e-05, "loss": 1.1927, "step": 2539, "task_loss": 0.6020919680595398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9529368877410889, "epoch": 2.15, "learning_rate": 3.466368796039126e-05, "loss": 1.0605, "step": 2540, "task_loss": 0.6872215867042542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5466176271438599, "epoch": 2.15, "learning_rate": 3.465765004226543e-05, "loss": 1.3216, "step": 2541, "task_loss": 1.284846544265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0043617486953735, "epoch": 2.15, "learning_rate": 3.4651612124139595e-05, "loss": 1.0973, "step": 2542, "task_loss": 1.4613919258117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8847873210906982, "epoch": 2.15, "learning_rate": 3.464557420601377e-05, "loss": 1.2071, "step": 2543, "task_loss": 1.0765708684921265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9613034129142761, "epoch": 2.15, "learning_rate": 3.4639536287887936e-05, "loss": 1.1469, "step": 2544, "task_loss": 0.5484113693237305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.166001319885254, "epoch": 2.15, "learning_rate": 3.463349836976211e-05, "loss": 1.1228, "step": 2545, "task_loss": 1.4188872575759888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3129665851593018, "epoch": 2.15, "learning_rate": 3.462746045163628e-05, "loss": 1.1087, "step": 2546, "task_loss": 1.3805090188980103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1447150707244873, "epoch": 2.15, "learning_rate": 3.4621422533510445e-05, "loss": 1.0386, "step": 2547, "task_loss": 1.035521388053894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6487390995025635, "epoch": 2.15, "learning_rate": 3.461538461538462e-05, "loss": 0.8599, "step": 2548, "task_loss": 0.1829056739807129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3855171203613281, "epoch": 2.15, "learning_rate": 3.4609346697258786e-05, "loss": 1.0213, "step": 2549, "task_loss": 1.4720946550369263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1885156631469727, "epoch": 2.16, "learning_rate": 3.460330877913296e-05, "loss": 1.2611, "step": 2550, "task_loss": 1.2164133787155151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9729989171028137, "epoch": 2.16, "learning_rate": 3.459727086100713e-05, "loss": 1.0546, "step": 2551, "task_loss": 0.7820262312889099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.442335605621338, "epoch": 2.16, "learning_rate": 3.4591232942881294e-05, "loss": 1.2985, "step": 2552, "task_loss": 1.735497236251831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9396524429321289, "epoch": 2.16, "learning_rate": 3.458519502475547e-05, "loss": 0.7632, "step": 2553, "task_loss": 1.0489277839660645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2471200227737427, "epoch": 2.16, "learning_rate": 3.4579157106629635e-05, "loss": 0.9683, "step": 2554, "task_loss": 0.4694829285144806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0390191078186035, "epoch": 2.16, "learning_rate": 3.45731191885038e-05, "loss": 1.1179, "step": 2555, "task_loss": 1.3423452377319336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1110248565673828, "epoch": 2.16, "learning_rate": 3.4567081270377976e-05, "loss": 1.1611, "step": 2556, "task_loss": 1.9497874975204468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9859863519668579, "epoch": 2.16, "learning_rate": 3.4561043352252144e-05, "loss": 1.1326, "step": 2557, "task_loss": 1.3359191417694092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.510878086090088, "epoch": 2.16, "learning_rate": 3.455500543412631e-05, "loss": 0.97, "step": 2558, "task_loss": 0.9846051931381226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6238220930099487, "epoch": 2.16, "learning_rate": 3.4548967516000485e-05, "loss": 1.2379, "step": 2559, "task_loss": 2.1146891117095947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2534136772155762, "epoch": 2.16, "learning_rate": 3.454292959787466e-05, "loss": 1.3739, "step": 2560, "task_loss": 0.6322780251502991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3101569414138794, "epoch": 2.16, "learning_rate": 3.4536891679748826e-05, "loss": 0.7611, "step": 2561, "task_loss": 0.6386892795562744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.358349084854126, "epoch": 2.17, "learning_rate": 3.453085376162299e-05, "loss": 1.4254, "step": 2562, "task_loss": 1.1581001281738281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4563337564468384, "epoch": 2.17, "learning_rate": 3.452481584349717e-05, "loss": 1.005, "step": 2563, "task_loss": 0.8354026675224304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9888055324554443, "epoch": 2.17, "learning_rate": 3.4518777925371334e-05, "loss": 1.0262, "step": 2564, "task_loss": 1.3684767484664917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8452444076538086, "epoch": 2.17, "learning_rate": 3.45127400072455e-05, "loss": 1.009, "step": 2565, "task_loss": 0.41719377040863037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.734812617301941, "epoch": 2.17, "learning_rate": 3.4506702089119675e-05, "loss": 1.1615, "step": 2566, "task_loss": 1.3667136430740356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8321875333786011, "epoch": 2.17, "learning_rate": 3.450066417099384e-05, "loss": 1.1637, "step": 2567, "task_loss": 0.6410722136497498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.958520770072937, "epoch": 2.17, "learning_rate": 3.449462625286801e-05, "loss": 0.8706, "step": 2568, "task_loss": 0.36951157450675964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7271863222122192, "epoch": 2.17, "learning_rate": 3.4488588334742184e-05, "loss": 1.0141, "step": 2569, "task_loss": 0.6958513259887695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6534376740455627, "epoch": 2.17, "learning_rate": 3.448255041661636e-05, "loss": 0.8756, "step": 2570, "task_loss": 0.19065265357494354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.104745864868164, "epoch": 2.17, "learning_rate": 3.447651249849052e-05, "loss": 1.1397, "step": 2571, "task_loss": 1.342674970626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2388222217559814, "epoch": 2.17, "learning_rate": 3.447047458036469e-05, "loss": 0.8099, "step": 2572, "task_loss": 1.2349869012832642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7347530722618103, "epoch": 2.17, "learning_rate": 3.4464436662238866e-05, "loss": 0.8695, "step": 2573, "task_loss": 0.7151706218719482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0751478672027588, "epoch": 2.18, "learning_rate": 3.4458398744113026e-05, "loss": 1.1783, "step": 2574, "task_loss": 1.4406627416610718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1418249607086182, "epoch": 2.18, "learning_rate": 3.44523608259872e-05, "loss": 1.0347, "step": 2575, "task_loss": 0.8527354598045349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1142523288726807, "epoch": 2.18, "learning_rate": 3.4446322907861374e-05, "loss": 1.11, "step": 2576, "task_loss": 1.6877604722976685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.708667516708374, "epoch": 2.18, "learning_rate": 3.444028498973554e-05, "loss": 1.0734, "step": 2577, "task_loss": 0.6796867847442627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8069379329681396, "epoch": 2.18, "learning_rate": 3.443424707160971e-05, "loss": 0.6966, "step": 2578, "task_loss": 1.3817098140716553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1646077632904053, "epoch": 2.18, "learning_rate": 3.442820915348388e-05, "loss": 1.2992, "step": 2579, "task_loss": 1.5151630640029907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.448328971862793, "epoch": 2.18, "learning_rate": 3.442217123535805e-05, "loss": 1.2454, "step": 2580, "task_loss": 0.5856110453605652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8423644304275513, "epoch": 2.18, "learning_rate": 3.441613331723222e-05, "loss": 1.0249, "step": 2581, "task_loss": 0.3457372784614563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0610859394073486, "epoch": 2.18, "learning_rate": 3.441009539910639e-05, "loss": 1.0793, "step": 2582, "task_loss": 0.475426584482193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.89096599817276, "epoch": 2.18, "learning_rate": 3.4404057480980565e-05, "loss": 0.8534, "step": 2583, "task_loss": 1.1937633752822876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.086619257926941, "epoch": 2.18, "learning_rate": 3.4398019562854725e-05, "loss": 1.0219, "step": 2584, "task_loss": 1.3101277351379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0598491430282593, "epoch": 2.19, "learning_rate": 3.43919816447289e-05, "loss": 0.9312, "step": 2585, "task_loss": 0.2732967138290405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6186603903770447, "epoch": 2.19, "learning_rate": 3.438594372660307e-05, "loss": 0.7945, "step": 2586, "task_loss": 0.7435577511787415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8667780160903931, "epoch": 2.19, "learning_rate": 3.4379905808477234e-05, "loss": 0.8379, "step": 2587, "task_loss": 0.9519128799438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8923795223236084, "epoch": 2.19, "learning_rate": 3.437386789035141e-05, "loss": 0.8693, "step": 2588, "task_loss": 0.9834550619125366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9654223322868347, "epoch": 2.19, "learning_rate": 3.436782997222558e-05, "loss": 1.0249, "step": 2589, "task_loss": 2.0153212547302246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.295036792755127, "epoch": 2.19, "learning_rate": 3.436179205409975e-05, "loss": 1.467, "step": 2590, "task_loss": 1.553170919418335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1218854188919067, "epoch": 2.19, "learning_rate": 3.4355754135973916e-05, "loss": 0.8051, "step": 2591, "task_loss": 1.0275639295578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1406588554382324, "epoch": 2.19, "learning_rate": 3.434971621784809e-05, "loss": 0.9485, "step": 2592, "task_loss": 1.1052346229553223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.987655758857727, "epoch": 2.19, "learning_rate": 3.434367829972226e-05, "loss": 0.7353, "step": 2593, "task_loss": 0.5401403903961182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5932595729827881, "epoch": 2.19, "learning_rate": 3.4337640381596424e-05, "loss": 1.0469, "step": 2594, "task_loss": 1.0760095119476318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.079272747039795, "epoch": 2.19, "learning_rate": 3.43316024634706e-05, "loss": 0.952, "step": 2595, "task_loss": 0.9535637497901917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7089083194732666, "epoch": 2.19, "learning_rate": 3.4325564545344765e-05, "loss": 1.0304, "step": 2596, "task_loss": 1.0816075801849365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9977084398269653, "epoch": 2.2, "learning_rate": 3.431952662721893e-05, "loss": 1.1712, "step": 2597, "task_loss": 0.5775997042655945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6227670907974243, "epoch": 2.2, "learning_rate": 3.4313488709093107e-05, "loss": 1.2516, "step": 2598, "task_loss": 1.1307307481765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4746872186660767, "epoch": 2.2, "learning_rate": 3.430745079096728e-05, "loss": 1.066, "step": 2599, "task_loss": 2.412060260772705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8571417331695557, "epoch": 2.2, "learning_rate": 3.430141287284145e-05, "loss": 0.7375, "step": 2600, "task_loss": 1.036384105682373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8657087683677673, "epoch": 2.2, "learning_rate": 3.4295374954715615e-05, "loss": 0.9436, "step": 2601, "task_loss": 0.8357915878295898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2171831130981445, "epoch": 2.2, "learning_rate": 3.428933703658979e-05, "loss": 1.0872, "step": 2602, "task_loss": 0.8800215125083923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0127038955688477, "epoch": 2.2, "learning_rate": 3.4283299118463956e-05, "loss": 1.0037, "step": 2603, "task_loss": 1.4417030811309814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7587880492210388, "epoch": 2.2, "learning_rate": 3.427726120033812e-05, "loss": 1.14, "step": 2604, "task_loss": 1.0590839385986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 2.2289013862609863, "epoch": 2.2, "learning_rate": 3.42712232822123e-05, "loss": 1.1523, "step": 2605, "task_loss": 1.0584945678710938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7854428291320801, "epoch": 2.2, "learning_rate": 3.4265185364086464e-05, "loss": 0.8291, "step": 2606, "task_loss": 0.9879540801048279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2266194820404053, "epoch": 2.2, "learning_rate": 3.425914744596063e-05, "loss": 0.9044, "step": 2607, "task_loss": 1.2221444845199585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3500730991363525, "epoch": 2.2, "learning_rate": 3.4253109527834805e-05, "loss": 1.1623, "step": 2608, "task_loss": 1.3208087682724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1848549842834473, "epoch": 2.21, "learning_rate": 3.424707160970897e-05, "loss": 1.2707, "step": 2609, "task_loss": 0.9342983961105347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.279867172241211, "epoch": 2.21, "learning_rate": 3.4241033691583147e-05, "loss": 0.6991, "step": 2610, "task_loss": 0.3087937533855438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.947964072227478, "epoch": 2.21, "learning_rate": 3.4234995773457314e-05, "loss": 1.1283, "step": 2611, "task_loss": 2.706888437271118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0098567008972168, "epoch": 2.21, "learning_rate": 3.422895785533148e-05, "loss": 1.3186, "step": 2612, "task_loss": 0.917197048664093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.372505784034729, "epoch": 2.21, "learning_rate": 3.4222919937205655e-05, "loss": 1.371, "step": 2613, "task_loss": 0.5588114857673645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5519673824310303, "epoch": 2.21, "learning_rate": 3.421688201907982e-05, "loss": 1.155, "step": 2614, "task_loss": 0.17837344110012054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8124719858169556, "epoch": 2.21, "learning_rate": 3.421084410095399e-05, "loss": 0.9052, "step": 2615, "task_loss": 1.274876594543457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9510629177093506, "epoch": 2.21, "learning_rate": 3.420480618282816e-05, "loss": 1.2591, "step": 2616, "task_loss": 1.456710696220398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7944259643554688, "epoch": 2.21, "learning_rate": 3.419876826470233e-05, "loss": 1.028, "step": 2617, "task_loss": 1.876255989074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1987496614456177, "epoch": 2.21, "learning_rate": 3.4192730346576504e-05, "loss": 1.1439, "step": 2618, "task_loss": 0.5110378861427307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1718367338180542, "epoch": 2.21, "learning_rate": 3.418669242845067e-05, "loss": 0.9125, "step": 2619, "task_loss": 1.310496211051941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9535084366798401, "epoch": 2.21, "learning_rate": 3.4180654510324846e-05, "loss": 1.0658, "step": 2620, "task_loss": 1.4184926748275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9133282899856567, "epoch": 2.22, "learning_rate": 3.417461659219901e-05, "loss": 0.9009, "step": 2621, "task_loss": 0.5645947456359863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8920131921768188, "epoch": 2.22, "learning_rate": 3.416857867407318e-05, "loss": 0.8062, "step": 2622, "task_loss": 1.406044363975525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46550729870796204, "epoch": 2.22, "learning_rate": 3.4162540755947354e-05, "loss": 1.1189, "step": 2623, "task_loss": 0.4624236524105072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0660269260406494, "epoch": 2.22, "learning_rate": 3.415650283782152e-05, "loss": 1.3048, "step": 2624, "task_loss": 0.7277117371559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4068208932876587, "epoch": 2.22, "learning_rate": 3.415046491969569e-05, "loss": 1.4128, "step": 2625, "task_loss": 0.9926701188087463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7383972406387329, "epoch": 2.22, "learning_rate": 3.414442700156986e-05, "loss": 1.1177, "step": 2626, "task_loss": 0.957274317741394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9114563465118408, "epoch": 2.22, "learning_rate": 3.413838908344403e-05, "loss": 0.9853, "step": 2627, "task_loss": 0.6193715333938599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1046632528305054, "epoch": 2.22, "learning_rate": 3.4132351165318197e-05, "loss": 0.8785, "step": 2628, "task_loss": 1.4196383953094482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8527463674545288, "epoch": 2.22, "learning_rate": 3.412631324719237e-05, "loss": 0.7664, "step": 2629, "task_loss": 0.8996005058288574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7241092324256897, "epoch": 2.22, "learning_rate": 3.4120275329066544e-05, "loss": 0.7781, "step": 2630, "task_loss": 0.35440385341644287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0816233158111572, "epoch": 2.22, "learning_rate": 3.4114237410940705e-05, "loss": 0.8801, "step": 2631, "task_loss": 0.6183938384056091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0265636444091797, "epoch": 2.22, "learning_rate": 3.410819949281488e-05, "loss": 1.0162, "step": 2632, "task_loss": 0.30466100573539734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7011849880218506, "epoch": 2.23, "learning_rate": 3.410216157468905e-05, "loss": 1.0393, "step": 2633, "task_loss": 0.8757601380348206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9026620388031006, "epoch": 2.23, "learning_rate": 3.409612365656322e-05, "loss": 0.9853, "step": 2634, "task_loss": 0.903777539730072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0438021421432495, "epoch": 2.23, "learning_rate": 3.409008573843739e-05, "loss": 1.002, "step": 2635, "task_loss": 0.6546807885169983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28651177883148193, "epoch": 2.23, "learning_rate": 3.408404782031156e-05, "loss": 0.7028, "step": 2636, "task_loss": 0.10968948900699615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8486788272857666, "epoch": 2.23, "learning_rate": 3.407800990218573e-05, "loss": 0.9936, "step": 2637, "task_loss": 0.23335209488868713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44586181640625, "epoch": 2.23, "learning_rate": 3.4071971984059895e-05, "loss": 0.8871, "step": 2638, "task_loss": 0.10956268757581711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.157613754272461, "epoch": 2.23, "learning_rate": 3.406593406593407e-05, "loss": 1.0427, "step": 2639, "task_loss": 0.6963222622871399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.972432017326355, "epoch": 2.23, "learning_rate": 3.405989614780824e-05, "loss": 0.9158, "step": 2640, "task_loss": 1.4774059057235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.70735764503479, "epoch": 2.23, "learning_rate": 3.4053858229682404e-05, "loss": 1.0131, "step": 2641, "task_loss": 2.0583670139312744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5798364877700806, "epoch": 2.23, "learning_rate": 3.404782031155658e-05, "loss": 0.997, "step": 2642, "task_loss": 1.302799105644226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9422860741615295, "epoch": 2.23, "learning_rate": 3.404178239343075e-05, "loss": 1.043, "step": 2643, "task_loss": 1.3856111764907837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6854699850082397, "epoch": 2.23, "learning_rate": 3.403574447530491e-05, "loss": 1.0496, "step": 2644, "task_loss": 0.6804392337799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8499003648757935, "epoch": 2.24, "learning_rate": 3.4029706557179086e-05, "loss": 1.0954, "step": 2645, "task_loss": 1.5785785913467407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7805129885673523, "epoch": 2.24, "learning_rate": 3.402366863905326e-05, "loss": 1.1403, "step": 2646, "task_loss": 0.5567512512207031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8451440334320068, "epoch": 2.24, "learning_rate": 3.401763072092742e-05, "loss": 0.9345, "step": 2647, "task_loss": 0.22002291679382324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7402001619338989, "epoch": 2.24, "learning_rate": 3.4011592802801594e-05, "loss": 0.713, "step": 2648, "task_loss": 0.6807963848114014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8235834240913391, "epoch": 2.24, "learning_rate": 3.400555488467577e-05, "loss": 0.8971, "step": 2649, "task_loss": 0.7971684336662292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3212652206420898, "epoch": 2.24, "learning_rate": 3.3999516966549936e-05, "loss": 0.9333, "step": 2650, "task_loss": 1.540669560432434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8050234913825989, "epoch": 2.24, "learning_rate": 3.39934790484241e-05, "loss": 1.0631, "step": 2651, "task_loss": 0.7079522013664246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8887056708335876, "epoch": 2.24, "learning_rate": 3.398744113029828e-05, "loss": 0.9981, "step": 2652, "task_loss": 0.6643526554107666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4707534909248352, "epoch": 2.24, "learning_rate": 3.3981403212172444e-05, "loss": 0.7686, "step": 2653, "task_loss": 1.320693850517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7952332496643066, "epoch": 2.24, "learning_rate": 3.397536529404661e-05, "loss": 0.8382, "step": 2654, "task_loss": 1.5837558507919312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7599750757217407, "epoch": 2.24, "learning_rate": 3.3969327375920785e-05, "loss": 0.8924, "step": 2655, "task_loss": 0.8242706060409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9235665798187256, "epoch": 2.24, "learning_rate": 3.396328945779496e-05, "loss": 1.1686, "step": 2656, "task_loss": 1.292905330657959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9184669852256775, "epoch": 2.25, "learning_rate": 3.395725153966912e-05, "loss": 1.2078, "step": 2657, "task_loss": 1.4248089790344238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9695634245872498, "epoch": 2.25, "learning_rate": 3.395121362154329e-05, "loss": 1.0313, "step": 2658, "task_loss": 2.0371057987213135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.575175166130066, "epoch": 2.25, "learning_rate": 3.394517570341747e-05, "loss": 1.1826, "step": 2659, "task_loss": 1.4106309413909912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1319276094436646, "epoch": 2.25, "learning_rate": 3.393913778529163e-05, "loss": 0.9901, "step": 2660, "task_loss": 0.6710088849067688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8029345273971558, "epoch": 2.25, "learning_rate": 3.39330998671658e-05, "loss": 0.8615, "step": 2661, "task_loss": 0.3325652480125427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2347334623336792, "epoch": 2.25, "learning_rate": 3.3927061949039976e-05, "loss": 0.9499, "step": 2662, "task_loss": 1.2764862775802612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1245617866516113, "epoch": 2.25, "learning_rate": 3.392102403091414e-05, "loss": 1.0519, "step": 2663, "task_loss": 1.120246171951294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9194535613059998, "epoch": 2.25, "learning_rate": 3.391498611278831e-05, "loss": 0.7084, "step": 2664, "task_loss": 1.0923113822937012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.976528525352478, "epoch": 2.25, "learning_rate": 3.3908948194662484e-05, "loss": 0.9105, "step": 2665, "task_loss": 0.9809689521789551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5307762622833252, "epoch": 2.25, "learning_rate": 3.390291027653665e-05, "loss": 1.1235, "step": 2666, "task_loss": 1.7642765045166016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9152707457542419, "epoch": 2.25, "learning_rate": 3.389687235841082e-05, "loss": 1.4282, "step": 2667, "task_loss": 0.6006597876548767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5972282886505127, "epoch": 2.26, "learning_rate": 3.389083444028499e-05, "loss": 1.1225, "step": 2668, "task_loss": 1.4307271242141724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7676655054092407, "epoch": 2.26, "learning_rate": 3.388479652215916e-05, "loss": 1.1761, "step": 2669, "task_loss": 1.8560175895690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5895076394081116, "epoch": 2.26, "learning_rate": 3.387875860403333e-05, "loss": 0.7095, "step": 2670, "task_loss": 0.07226578891277313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4113216400146484, "epoch": 2.26, "learning_rate": 3.38727206859075e-05, "loss": 1.0912, "step": 2671, "task_loss": 1.507526159286499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5729827880859375, "epoch": 2.26, "learning_rate": 3.386668276778167e-05, "loss": 0.9892, "step": 2672, "task_loss": 1.5793005228042603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0332493782043457, "epoch": 2.26, "learning_rate": 3.386064484965584e-05, "loss": 0.857, "step": 2673, "task_loss": 0.732329249382019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0338621139526367, "epoch": 2.26, "learning_rate": 3.385460693153001e-05, "loss": 0.8643, "step": 2674, "task_loss": 1.4471789598464966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.191853642463684, "epoch": 2.26, "learning_rate": 3.384856901340418e-05, "loss": 0.8097, "step": 2675, "task_loss": 0.34134289622306824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8285110592842102, "epoch": 2.26, "learning_rate": 3.384253109527835e-05, "loss": 1.0557, "step": 2676, "task_loss": 0.5440914034843445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8842818140983582, "epoch": 2.26, "learning_rate": 3.383649317715252e-05, "loss": 0.9394, "step": 2677, "task_loss": 0.3601643443107605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6541275978088379, "epoch": 2.26, "learning_rate": 3.383045525902669e-05, "loss": 0.9225, "step": 2678, "task_loss": 0.4360131621360779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.724791407585144, "epoch": 2.26, "learning_rate": 3.382441734090086e-05, "loss": 0.9809, "step": 2679, "task_loss": 0.12596459686756134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.932608962059021, "epoch": 2.27, "learning_rate": 3.3818379422775026e-05, "loss": 1.0507, "step": 2680, "task_loss": 0.6689938306808472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.817481279373169, "epoch": 2.27, "learning_rate": 3.38123415046492e-05, "loss": 0.8276, "step": 2681, "task_loss": 0.7658313512802124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7116538882255554, "epoch": 2.27, "learning_rate": 3.380630358652337e-05, "loss": 1.1844, "step": 2682, "task_loss": 0.3444148898124695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6643540859222412, "epoch": 2.27, "learning_rate": 3.380026566839754e-05, "loss": 0.8891, "step": 2683, "task_loss": 0.4557141065597534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5177115201950073, "epoch": 2.27, "learning_rate": 3.379422775027171e-05, "loss": 0.9998, "step": 2684, "task_loss": 0.6134933829307556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6955198049545288, "epoch": 2.27, "learning_rate": 3.3788189832145875e-05, "loss": 0.7871, "step": 2685, "task_loss": 1.3603031635284424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0159592628479004, "epoch": 2.27, "learning_rate": 3.378215191402005e-05, "loss": 0.8162, "step": 2686, "task_loss": 0.5143377780914307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2816896438598633, "epoch": 2.27, "learning_rate": 3.3776113995894216e-05, "loss": 1.0803, "step": 2687, "task_loss": 0.5969460606575012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0231655836105347, "epoch": 2.27, "learning_rate": 3.377007607776838e-05, "loss": 1.2357, "step": 2688, "task_loss": 0.5532245635986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1717865467071533, "epoch": 2.27, "learning_rate": 3.376403815964256e-05, "loss": 0.9498, "step": 2689, "task_loss": 1.2657361030578613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0489566326141357, "epoch": 2.27, "learning_rate": 3.3758000241516725e-05, "loss": 0.9028, "step": 2690, "task_loss": 1.637095332145691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8747189044952393, "epoch": 2.27, "learning_rate": 3.37519623233909e-05, "loss": 0.8691, "step": 2691, "task_loss": 1.1510212421417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7408261299133301, "epoch": 2.28, "learning_rate": 3.3745924405265066e-05, "loss": 0.8189, "step": 2692, "task_loss": 0.6442035436630249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6757903099060059, "epoch": 2.28, "learning_rate": 3.373988648713924e-05, "loss": 0.9574, "step": 2693, "task_loss": 0.5955031514167786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1754645109176636, "epoch": 2.28, "learning_rate": 3.373384856901341e-05, "loss": 0.9679, "step": 2694, "task_loss": 1.7746039628982544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6735185384750366, "epoch": 2.28, "learning_rate": 3.3727810650887574e-05, "loss": 0.8251, "step": 2695, "task_loss": 1.2289445400238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.622434139251709, "epoch": 2.28, "learning_rate": 3.372177273276175e-05, "loss": 0.6994, "step": 2696, "task_loss": 1.0057252645492554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6504582166671753, "epoch": 2.28, "learning_rate": 3.3715734814635915e-05, "loss": 0.8908, "step": 2697, "task_loss": 0.4509908854961395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7204890251159668, "epoch": 2.28, "learning_rate": 3.370969689651008e-05, "loss": 0.8769, "step": 2698, "task_loss": 1.3233318328857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6269570589065552, "epoch": 2.28, "learning_rate": 3.3703658978384256e-05, "loss": 1.1712, "step": 2699, "task_loss": 1.956056833267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8725727200508118, "epoch": 2.28, "learning_rate": 3.3697621060258423e-05, "loss": 0.8537, "step": 2700, "task_loss": 0.8371046781539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5871586799621582, "epoch": 2.28, "learning_rate": 3.369158314213259e-05, "loss": 0.7538, "step": 2701, "task_loss": 1.3855600357055664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7420125603675842, "epoch": 2.28, "learning_rate": 3.3685545224006765e-05, "loss": 0.7936, "step": 2702, "task_loss": 0.22515350580215454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0388225317001343, "epoch": 2.28, "learning_rate": 3.367950730588094e-05, "loss": 0.854, "step": 2703, "task_loss": 0.8338340520858765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2776868343353271, "epoch": 2.29, "learning_rate": 3.36734693877551e-05, "loss": 1.1214, "step": 2704, "task_loss": 1.6590330600738525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0027110576629639, "epoch": 2.29, "learning_rate": 3.366743146962927e-05, "loss": 1.2852, "step": 2705, "task_loss": 1.6982241868972778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6925567388534546, "epoch": 2.29, "learning_rate": 3.366139355150345e-05, "loss": 0.9791, "step": 2706, "task_loss": 1.2016539573669434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9200738668441772, "epoch": 2.29, "learning_rate": 3.3655355633377614e-05, "loss": 1.022, "step": 2707, "task_loss": 0.621199905872345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8233129978179932, "epoch": 2.29, "learning_rate": 3.364931771525178e-05, "loss": 0.7675, "step": 2708, "task_loss": 1.717698574066162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.7800097465515137, "epoch": 2.29, "learning_rate": 3.3643279797125955e-05, "loss": 1.0293, "step": 2709, "task_loss": 1.6631591320037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.835180401802063, "epoch": 2.29, "learning_rate": 3.363724187900012e-05, "loss": 0.8136, "step": 2710, "task_loss": 0.8154518604278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4944051504135132, "epoch": 2.29, "learning_rate": 3.363120396087429e-05, "loss": 1.2864, "step": 2711, "task_loss": 0.22400406002998352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6651196479797363, "epoch": 2.29, "learning_rate": 3.3625166042748464e-05, "loss": 0.8696, "step": 2712, "task_loss": 0.876169741153717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0212856531143188, "epoch": 2.29, "learning_rate": 3.361912812462264e-05, "loss": 0.861, "step": 2713, "task_loss": 0.5062984824180603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8903442621231079, "epoch": 2.29, "learning_rate": 3.36130902064968e-05, "loss": 1.0297, "step": 2714, "task_loss": 0.9009994864463806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.918868899345398, "epoch": 2.29, "learning_rate": 3.360705228837097e-05, "loss": 1.0413, "step": 2715, "task_loss": 1.5730019807815552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.239820122718811, "epoch": 2.3, "learning_rate": 3.3601014370245146e-05, "loss": 1.0922, "step": 2716, "task_loss": 2.085752010345459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4795341491699219, "epoch": 2.3, "learning_rate": 3.3594976452119306e-05, "loss": 0.6926, "step": 2717, "task_loss": 0.9183290004730225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8154768943786621, "epoch": 2.3, "learning_rate": 3.358893853399348e-05, "loss": 0.7314, "step": 2718, "task_loss": 1.0868339538574219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8981808423995972, "epoch": 2.3, "learning_rate": 3.3582900615867654e-05, "loss": 1.0231, "step": 2719, "task_loss": 1.1124112606048584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8533185124397278, "epoch": 2.3, "learning_rate": 3.3576862697741815e-05, "loss": 0.9899, "step": 2720, "task_loss": 1.3460566997528076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.029684066772461, "epoch": 2.3, "learning_rate": 3.357082477961599e-05, "loss": 0.9522, "step": 2721, "task_loss": 0.6942339539527893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.175058364868164, "epoch": 2.3, "learning_rate": 3.356478686149016e-05, "loss": 0.7325, "step": 2722, "task_loss": 0.821764349937439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.682202935218811, "epoch": 2.3, "learning_rate": 3.355874894336433e-05, "loss": 1.0157, "step": 2723, "task_loss": 0.6960113644599915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0707149505615234, "epoch": 2.3, "learning_rate": 3.35527110252385e-05, "loss": 0.8266, "step": 2724, "task_loss": 0.5615648627281189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7703637480735779, "epoch": 2.3, "learning_rate": 3.354667310711267e-05, "loss": 0.8777, "step": 2725, "task_loss": 0.538072407245636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5724782943725586, "epoch": 2.3, "learning_rate": 3.354063518898684e-05, "loss": 0.9046, "step": 2726, "task_loss": 0.3673652410507202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7646126747131348, "epoch": 2.3, "learning_rate": 3.3534597270861005e-05, "loss": 0.7657, "step": 2727, "task_loss": 0.7637287378311157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.918021023273468, "epoch": 2.31, "learning_rate": 3.352855935273518e-05, "loss": 0.9972, "step": 2728, "task_loss": 0.6936092972755432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.450455665588379, "epoch": 2.31, "learning_rate": 3.352252143460935e-05, "loss": 1.0729, "step": 2729, "task_loss": 2.500781774520874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9366022944450378, "epoch": 2.31, "learning_rate": 3.3516483516483513e-05, "loss": 0.9498, "step": 2730, "task_loss": 0.4676834046840668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2773841321468353, "epoch": 2.31, "learning_rate": 3.351044559835769e-05, "loss": 0.7392, "step": 2731, "task_loss": 0.24799323081970215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0175578594207764, "epoch": 2.31, "learning_rate": 3.350440768023186e-05, "loss": 0.962, "step": 2732, "task_loss": 0.7949071526527405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0977458953857422, "epoch": 2.31, "learning_rate": 3.349836976210603e-05, "loss": 0.9718, "step": 2733, "task_loss": 1.2721599340438843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6375280022621155, "epoch": 2.31, "learning_rate": 3.3492331843980196e-05, "loss": 0.7857, "step": 2734, "task_loss": 0.58403080701828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1336392164230347, "epoch": 2.31, "learning_rate": 3.348629392585437e-05, "loss": 1.2442, "step": 2735, "task_loss": 0.5983372330665588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6882148385047913, "epoch": 2.31, "learning_rate": 3.348025600772854e-05, "loss": 0.9241, "step": 2736, "task_loss": 0.48378580808639526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6819981336593628, "epoch": 2.31, "learning_rate": 3.3474218089602704e-05, "loss": 0.7555, "step": 2737, "task_loss": 1.1934655904769897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3990743160247803, "epoch": 2.31, "learning_rate": 3.346818017147688e-05, "loss": 1.0298, "step": 2738, "task_loss": 1.1877245903015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5990388989448547, "epoch": 2.32, "learning_rate": 3.3462142253351045e-05, "loss": 0.7776, "step": 2739, "task_loss": 0.5888925790786743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8682611584663391, "epoch": 2.32, "learning_rate": 3.345610433522521e-05, "loss": 0.9617, "step": 2740, "task_loss": 1.3794783353805542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9116361141204834, "epoch": 2.32, "learning_rate": 3.3450066417099386e-05, "loss": 1.3678, "step": 2741, "task_loss": 1.0557504892349243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0719447135925293, "epoch": 2.32, "learning_rate": 3.3444028498973554e-05, "loss": 1.0208, "step": 2742, "task_loss": 1.0600581169128418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6759663820266724, "epoch": 2.32, "learning_rate": 3.343799058084773e-05, "loss": 0.7183, "step": 2743, "task_loss": 2.053166151046753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7869624495506287, "epoch": 2.32, "learning_rate": 3.3431952662721895e-05, "loss": 0.803, "step": 2744, "task_loss": 0.5384209156036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2309107780456543, "epoch": 2.32, "learning_rate": 3.342591474459606e-05, "loss": 0.9157, "step": 2745, "task_loss": 0.7178307771682739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7603623867034912, "epoch": 2.32, "learning_rate": 3.3419876826470236e-05, "loss": 0.7719, "step": 2746, "task_loss": 0.5239214301109314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9308511018753052, "epoch": 2.32, "learning_rate": 3.34138389083444e-05, "loss": 1.0092, "step": 2747, "task_loss": 0.89130038022995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6566071510314941, "epoch": 2.32, "learning_rate": 3.340780099021858e-05, "loss": 0.884, "step": 2748, "task_loss": 0.9415106773376465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1403889656066895, "epoch": 2.32, "learning_rate": 3.3401763072092744e-05, "loss": 0.7545, "step": 2749, "task_loss": 1.6960960626602173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7936314940452576, "epoch": 2.32, "learning_rate": 3.339572515396691e-05, "loss": 0.8289, "step": 2750, "task_loss": 0.9571467638015747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8271783590316772, "epoch": 2.33, "learning_rate": 3.3389687235841085e-05, "loss": 0.9586, "step": 2751, "task_loss": 1.3062400817871094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7586166858673096, "epoch": 2.33, "learning_rate": 3.338364931771525e-05, "loss": 1.1026, "step": 2752, "task_loss": 1.0915030241012573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32363593578338623, "epoch": 2.33, "learning_rate": 3.3377611399589426e-05, "loss": 0.6981, "step": 2753, "task_loss": 0.5771260857582092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0949854850769043, "epoch": 2.33, "learning_rate": 3.3371573481463594e-05, "loss": 1.0975, "step": 2754, "task_loss": 1.0440804958343506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2271556854248047, "epoch": 2.33, "learning_rate": 3.336553556333776e-05, "loss": 1.1242, "step": 2755, "task_loss": 1.2121460437774658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7287555932998657, "epoch": 2.33, "learning_rate": 3.3359497645211935e-05, "loss": 0.9124, "step": 2756, "task_loss": 0.6143949627876282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1714069843292236, "epoch": 2.33, "learning_rate": 3.33534597270861e-05, "loss": 1.0478, "step": 2757, "task_loss": 1.3940870761871338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5877744555473328, "epoch": 2.33, "learning_rate": 3.334742180896027e-05, "loss": 0.8255, "step": 2758, "task_loss": 0.4478219151496887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2909646034240723, "epoch": 2.33, "learning_rate": 3.334138389083444e-05, "loss": 1.0216, "step": 2759, "task_loss": 1.5894781351089478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8579210638999939, "epoch": 2.33, "learning_rate": 3.333534597270861e-05, "loss": 0.923, "step": 2760, "task_loss": 1.1583107709884644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9678686857223511, "epoch": 2.33, "learning_rate": 3.332930805458278e-05, "loss": 1.0387, "step": 2761, "task_loss": 0.7029757499694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2998355627059937, "epoch": 2.33, "learning_rate": 3.332327013645695e-05, "loss": 1.0126, "step": 2762, "task_loss": 1.3968712091445923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.929969310760498, "epoch": 2.34, "learning_rate": 3.3317232218331125e-05, "loss": 0.8186, "step": 2763, "task_loss": 1.5328550338745117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.581977367401123, "epoch": 2.34, "learning_rate": 3.331119430020529e-05, "loss": 0.6632, "step": 2764, "task_loss": 0.7838281989097595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9163552522659302, "epoch": 2.34, "learning_rate": 3.330515638207946e-05, "loss": 0.7615, "step": 2765, "task_loss": 0.7887030243873596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6203932762145996, "epoch": 2.34, "learning_rate": 3.3299118463953634e-05, "loss": 0.7808, "step": 2766, "task_loss": 1.2968274354934692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.635939359664917, "epoch": 2.34, "learning_rate": 3.32930805458278e-05, "loss": 0.8846, "step": 2767, "task_loss": 0.4422922134399414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8960785865783691, "epoch": 2.34, "learning_rate": 3.328704262770197e-05, "loss": 1.0047, "step": 2768, "task_loss": 0.4507089853286743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5274184942245483, "epoch": 2.34, "learning_rate": 3.328100470957614e-05, "loss": 0.8729, "step": 2769, "task_loss": 0.44616588950157166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0400034189224243, "epoch": 2.34, "learning_rate": 3.327496679145031e-05, "loss": 0.7517, "step": 2770, "task_loss": 0.9686523079872131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0427360534667969, "epoch": 2.34, "learning_rate": 3.3268928873324476e-05, "loss": 0.9794, "step": 2771, "task_loss": 0.4361608028411865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0210022926330566, "epoch": 2.34, "learning_rate": 3.326289095519865e-05, "loss": 0.9857, "step": 2772, "task_loss": 1.7914854288101196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4057784080505371, "epoch": 2.34, "learning_rate": 3.3256853037072824e-05, "loss": 0.8616, "step": 2773, "task_loss": 0.41765663027763367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8480380773544312, "epoch": 2.34, "learning_rate": 3.3250815118946985e-05, "loss": 0.8342, "step": 2774, "task_loss": 0.8104041814804077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9782816171646118, "epoch": 2.35, "learning_rate": 3.324477720082116e-05, "loss": 0.9393, "step": 2775, "task_loss": 0.8064548969268799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6400340795516968, "epoch": 2.35, "learning_rate": 3.323873928269533e-05, "loss": 0.9706, "step": 2776, "task_loss": 0.8559364080429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6473779678344727, "epoch": 2.35, "learning_rate": 3.323270136456949e-05, "loss": 0.6339, "step": 2777, "task_loss": 0.7535467147827148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8532047867774963, "epoch": 2.35, "learning_rate": 3.322666344644367e-05, "loss": 0.675, "step": 2778, "task_loss": 0.46080926060676575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.70982825756073, "epoch": 2.35, "learning_rate": 3.322062552831784e-05, "loss": 0.8629, "step": 2779, "task_loss": 1.0623927116394043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5893363952636719, "epoch": 2.35, "learning_rate": 3.321458761019201e-05, "loss": 0.9807, "step": 2780, "task_loss": 0.40249910950660706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8153378367424011, "epoch": 2.35, "learning_rate": 3.3208549692066175e-05, "loss": 0.8976, "step": 2781, "task_loss": 1.7063082456588745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9056700468063354, "epoch": 2.35, "learning_rate": 3.320251177394035e-05, "loss": 0.9575, "step": 2782, "task_loss": 0.9640542268753052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1873252391815186, "epoch": 2.35, "learning_rate": 3.3196473855814516e-05, "loss": 0.9412, "step": 2783, "task_loss": 1.4302915334701538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0045500993728638, "epoch": 2.35, "learning_rate": 3.3190435937688684e-05, "loss": 0.7086, "step": 2784, "task_loss": 1.048970341682434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2279248237609863, "epoch": 2.35, "learning_rate": 3.318439801956286e-05, "loss": 1.2548, "step": 2785, "task_loss": 0.9453883171081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2117135524749756, "epoch": 2.35, "learning_rate": 3.317836010143703e-05, "loss": 1.3307, "step": 2786, "task_loss": 1.372259259223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8918375968933105, "epoch": 2.36, "learning_rate": 3.317232218331119e-05, "loss": 1.0388, "step": 2787, "task_loss": 1.1482336521148682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9327211380004883, "epoch": 2.36, "learning_rate": 3.3166284265185366e-05, "loss": 0.796, "step": 2788, "task_loss": 1.1190260648727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.742501974105835, "epoch": 2.36, "learning_rate": 3.316024634705954e-05, "loss": 1.0226, "step": 2789, "task_loss": 0.8503888845443726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5036033391952515, "epoch": 2.36, "learning_rate": 3.31542084289337e-05, "loss": 0.9159, "step": 2790, "task_loss": 0.5288277268409729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7272175550460815, "epoch": 2.36, "learning_rate": 3.3148170510807874e-05, "loss": 0.9801, "step": 2791, "task_loss": 0.8269513845443726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.867139458656311, "epoch": 2.36, "learning_rate": 3.314213259268205e-05, "loss": 0.9429, "step": 2792, "task_loss": 0.5630469918251038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3334500789642334, "epoch": 2.36, "learning_rate": 3.3136094674556215e-05, "loss": 0.9502, "step": 2793, "task_loss": 1.378792643547058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7074646949768066, "epoch": 2.36, "learning_rate": 3.313005675643038e-05, "loss": 1.0753, "step": 2794, "task_loss": 0.49129319190979004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5991054773330688, "epoch": 2.36, "learning_rate": 3.3124018838304557e-05, "loss": 0.9418, "step": 2795, "task_loss": 0.47627317905426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4860683679580688, "epoch": 2.36, "learning_rate": 3.3117980920178724e-05, "loss": 1.1168, "step": 2796, "task_loss": 1.0081244707107544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8480997085571289, "epoch": 2.36, "learning_rate": 3.311194300205289e-05, "loss": 0.8715, "step": 2797, "task_loss": 1.1260061264038086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2352893352508545, "epoch": 2.36, "learning_rate": 3.3105905083927065e-05, "loss": 0.8859, "step": 2798, "task_loss": 0.44881337881088257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6530822515487671, "epoch": 2.37, "learning_rate": 3.309986716580123e-05, "loss": 0.8176, "step": 2799, "task_loss": 1.0597397089004517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2304600477218628, "epoch": 2.37, "learning_rate": 3.30938292476754e-05, "loss": 1.0072, "step": 2800, "task_loss": 1.1774234771728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8958079814910889, "epoch": 2.37, "learning_rate": 3.308779132954957e-05, "loss": 0.7998, "step": 2801, "task_loss": 0.8911529183387756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1642166376113892, "epoch": 2.37, "learning_rate": 3.308175341142374e-05, "loss": 0.8822, "step": 2802, "task_loss": 1.0235081911087036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.834490180015564, "epoch": 2.37, "learning_rate": 3.3075715493297914e-05, "loss": 0.8435, "step": 2803, "task_loss": 0.9120036363601685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1348217725753784, "epoch": 2.37, "learning_rate": 3.306967757517208e-05, "loss": 0.8852, "step": 2804, "task_loss": 1.1849141120910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8975103497505188, "epoch": 2.37, "learning_rate": 3.3063639657046255e-05, "loss": 0.9898, "step": 2805, "task_loss": 1.3823573589324951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5769085884094238, "epoch": 2.37, "learning_rate": 3.305760173892042e-05, "loss": 0.8847, "step": 2806, "task_loss": 0.6307712197303772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8605297207832336, "epoch": 2.37, "learning_rate": 3.305156382079459e-05, "loss": 0.9355, "step": 2807, "task_loss": 0.8241601586341858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.180167555809021, "epoch": 2.37, "learning_rate": 3.3045525902668764e-05, "loss": 0.7468, "step": 2808, "task_loss": 1.04692542552948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8402324318885803, "epoch": 2.37, "learning_rate": 3.303948798454293e-05, "loss": 0.7038, "step": 2809, "task_loss": 0.6949109435081482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7122187614440918, "epoch": 2.38, "learning_rate": 3.30334500664171e-05, "loss": 0.909, "step": 2810, "task_loss": 0.7559311985969543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7848840951919556, "epoch": 2.38, "learning_rate": 3.302741214829127e-05, "loss": 0.992, "step": 2811, "task_loss": 0.6812652945518494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1048786640167236, "epoch": 2.38, "learning_rate": 3.302137423016544e-05, "loss": 0.8967, "step": 2812, "task_loss": 0.9271783828735352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5336031913757324, "epoch": 2.38, "learning_rate": 3.301533631203961e-05, "loss": 0.8999, "step": 2813, "task_loss": 1.6040239334106445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5380577445030212, "epoch": 2.38, "learning_rate": 3.300929839391378e-05, "loss": 0.8099, "step": 2814, "task_loss": 0.14455053210258484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7060345411300659, "epoch": 2.38, "learning_rate": 3.300326047578795e-05, "loss": 0.9763, "step": 2815, "task_loss": 0.6798547506332397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2111506462097168, "epoch": 2.38, "learning_rate": 3.299722255766212e-05, "loss": 0.9161, "step": 2816, "task_loss": 1.1247963905334473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1909908056259155, "epoch": 2.38, "learning_rate": 3.299118463953629e-05, "loss": 0.7299, "step": 2817, "task_loss": 1.0985205173492432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4357483386993408, "epoch": 2.38, "learning_rate": 3.2985146721410456e-05, "loss": 0.5673, "step": 2818, "task_loss": 0.37735363841056824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5549491047859192, "epoch": 2.38, "learning_rate": 3.297910880328463e-05, "loss": 0.6768, "step": 2819, "task_loss": 0.6331230998039246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8248299360275269, "epoch": 2.38, "learning_rate": 3.29730708851588e-05, "loss": 0.6741, "step": 2820, "task_loss": 1.3869222402572632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.961200475692749, "epoch": 2.38, "learning_rate": 3.296703296703297e-05, "loss": 0.9932, "step": 2821, "task_loss": 0.5817019939422607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.923088788986206, "epoch": 2.39, "learning_rate": 3.296099504890714e-05, "loss": 1.0372, "step": 2822, "task_loss": 1.1444703340530396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6645529270172119, "epoch": 2.39, "learning_rate": 3.2954957130781305e-05, "loss": 0.8437, "step": 2823, "task_loss": 1.094321608543396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7674187421798706, "epoch": 2.39, "learning_rate": 3.294891921265548e-05, "loss": 0.8493, "step": 2824, "task_loss": 0.5415501594543457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5413985252380371, "epoch": 2.39, "learning_rate": 3.2942881294529647e-05, "loss": 0.8957, "step": 2825, "task_loss": 0.619649350643158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8950704336166382, "epoch": 2.39, "learning_rate": 3.293684337640382e-05, "loss": 0.8396, "step": 2826, "task_loss": 1.071526288986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.696370244026184, "epoch": 2.39, "learning_rate": 3.293080545827799e-05, "loss": 1.3135, "step": 2827, "task_loss": 1.1475722789764404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6022390723228455, "epoch": 2.39, "learning_rate": 3.2924767540152155e-05, "loss": 0.7315, "step": 2828, "task_loss": 0.7150059342384338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.560997724533081, "epoch": 2.39, "learning_rate": 3.291872962202633e-05, "loss": 0.822, "step": 2829, "task_loss": 1.1898075342178345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0872727632522583, "epoch": 2.39, "learning_rate": 3.2912691703900496e-05, "loss": 1.0204, "step": 2830, "task_loss": 0.6172168850898743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.04429030418396, "epoch": 2.39, "learning_rate": 3.290665378577466e-05, "loss": 0.7524, "step": 2831, "task_loss": 0.36200955510139465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.831575870513916, "epoch": 2.39, "learning_rate": 3.290061586764884e-05, "loss": 1.0549, "step": 2832, "task_loss": 0.4593145251274109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6238629221916199, "epoch": 2.39, "learning_rate": 3.2894577949523004e-05, "loss": 0.6351, "step": 2833, "task_loss": 1.0745619535446167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8361895680427551, "epoch": 2.4, "learning_rate": 3.288854003139717e-05, "loss": 1.0352, "step": 2834, "task_loss": 0.8293092250823975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7514456510543823, "epoch": 2.4, "learning_rate": 3.2882502113271346e-05, "loss": 0.8595, "step": 2835, "task_loss": 0.6336367130279541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1775598526000977, "epoch": 2.4, "learning_rate": 3.287646419514552e-05, "loss": 0.9379, "step": 2836, "task_loss": 1.566910743713379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6616877913475037, "epoch": 2.4, "learning_rate": 3.287042627701969e-05, "loss": 0.7718, "step": 2837, "task_loss": 0.7921245694160461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6777482032775879, "epoch": 2.4, "learning_rate": 3.2864388358893854e-05, "loss": 0.8376, "step": 2838, "task_loss": 0.4684142768383026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.862629234790802, "epoch": 2.4, "learning_rate": 3.285835044076803e-05, "loss": 0.7408, "step": 2839, "task_loss": 0.6321136951446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7331017255783081, "epoch": 2.4, "learning_rate": 3.2852312522642195e-05, "loss": 1.0422, "step": 2840, "task_loss": 1.3112554550170898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6967334747314453, "epoch": 2.4, "learning_rate": 3.284627460451636e-05, "loss": 0.7056, "step": 2841, "task_loss": 1.161782145500183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8684355020523071, "epoch": 2.4, "learning_rate": 3.2840236686390536e-05, "loss": 0.7655, "step": 2842, "task_loss": 0.7267280220985413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9047896862030029, "epoch": 2.4, "learning_rate": 3.28341987682647e-05, "loss": 0.9178, "step": 2843, "task_loss": 0.7912650108337402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5731492042541504, "epoch": 2.4, "learning_rate": 3.282816085013887e-05, "loss": 0.9212, "step": 2844, "task_loss": 0.3461819291114807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8881270885467529, "epoch": 2.4, "learning_rate": 3.2822122932013044e-05, "loss": 0.755, "step": 2845, "task_loss": 0.5680453777313232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6593795418739319, "epoch": 2.41, "learning_rate": 3.281608501388722e-05, "loss": 0.9017, "step": 2846, "task_loss": 0.3043040633201599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8972082734107971, "epoch": 2.41, "learning_rate": 3.281004709576138e-05, "loss": 0.9019, "step": 2847, "task_loss": 0.9299072623252869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2110645771026611, "epoch": 2.41, "learning_rate": 3.280400917763555e-05, "loss": 1.0781, "step": 2848, "task_loss": 0.7348819971084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9715776443481445, "epoch": 2.41, "learning_rate": 3.279797125950973e-05, "loss": 1.1793, "step": 2849, "task_loss": 0.541412889957428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2992650270462036, "epoch": 2.41, "learning_rate": 3.279193334138389e-05, "loss": 0.9831, "step": 2850, "task_loss": 0.9367376565933228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9535198211669922, "epoch": 2.41, "learning_rate": 3.278589542325806e-05, "loss": 0.8742, "step": 2851, "task_loss": 0.28230205178260803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7684029340744019, "epoch": 2.41, "learning_rate": 3.2779857505132235e-05, "loss": 0.827, "step": 2852, "task_loss": 1.345329761505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5055259466171265, "epoch": 2.41, "learning_rate": 3.27738195870064e-05, "loss": 0.8198, "step": 2853, "task_loss": 0.12649044394493103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8105188012123108, "epoch": 2.41, "learning_rate": 3.276778166888057e-05, "loss": 0.9507, "step": 2854, "task_loss": 0.6093190312385559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2786235809326172, "epoch": 2.41, "learning_rate": 3.276174375075474e-05, "loss": 1.0195, "step": 2855, "task_loss": 2.1092777252197266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7351697683334351, "epoch": 2.41, "learning_rate": 3.275570583262891e-05, "loss": 1.0528, "step": 2856, "task_loss": 0.9971580505371094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5019831657409668, "epoch": 2.41, "learning_rate": 3.274966791450308e-05, "loss": 0.8468, "step": 2857, "task_loss": 1.1405714750289917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2786078453063965, "epoch": 2.42, "learning_rate": 3.274362999637725e-05, "loss": 0.9961, "step": 2858, "task_loss": 1.215188980102539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0205318927764893, "epoch": 2.42, "learning_rate": 3.273759207825142e-05, "loss": 0.8504, "step": 2859, "task_loss": 0.5503911375999451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4503839313983917, "epoch": 2.42, "learning_rate": 3.2731554160125586e-05, "loss": 0.7484, "step": 2860, "task_loss": 0.6186447143554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.034995436668396, "epoch": 2.42, "learning_rate": 3.272551624199976e-05, "loss": 0.8673, "step": 2861, "task_loss": 1.4720786809921265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6631806492805481, "epoch": 2.42, "learning_rate": 3.2719478323873934e-05, "loss": 0.7874, "step": 2862, "task_loss": 0.44145673513412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6990792155265808, "epoch": 2.42, "learning_rate": 3.2713440405748094e-05, "loss": 0.947, "step": 2863, "task_loss": 1.4039933681488037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6229379177093506, "epoch": 2.42, "learning_rate": 3.270740248762227e-05, "loss": 0.6284, "step": 2864, "task_loss": 0.46257296204566956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5364512205123901, "epoch": 2.42, "learning_rate": 3.270136456949644e-05, "loss": 0.8307, "step": 2865, "task_loss": 0.2517240345478058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0853326320648193, "epoch": 2.42, "learning_rate": 3.269532665137061e-05, "loss": 0.9419, "step": 2866, "task_loss": 1.204682469367981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6977158784866333, "epoch": 2.42, "learning_rate": 3.268928873324478e-05, "loss": 0.9476, "step": 2867, "task_loss": 0.3645823001861572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6663147211074829, "epoch": 2.42, "learning_rate": 3.268325081511895e-05, "loss": 0.7112, "step": 2868, "task_loss": 0.8512425422668457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6107907295227051, "epoch": 2.42, "learning_rate": 3.267721289699312e-05, "loss": 0.6773, "step": 2869, "task_loss": 0.41885432600975037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2693182229995728, "epoch": 2.43, "learning_rate": 3.2671174978867285e-05, "loss": 0.9869, "step": 2870, "task_loss": 0.635145366191864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6516168117523193, "epoch": 2.43, "learning_rate": 3.266513706074146e-05, "loss": 0.9784, "step": 2871, "task_loss": 1.0750597715377808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4344112873077393, "epoch": 2.43, "learning_rate": 3.2659099142615626e-05, "loss": 0.9266, "step": 2872, "task_loss": 1.3012166023254395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49300891160964966, "epoch": 2.43, "learning_rate": 3.265306122448979e-05, "loss": 0.6958, "step": 2873, "task_loss": 0.711258590221405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4768095016479492, "epoch": 2.43, "learning_rate": 3.264702330636397e-05, "loss": 0.8277, "step": 2874, "task_loss": 0.2731589376926422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1137490272521973, "epoch": 2.43, "learning_rate": 3.2640985388238134e-05, "loss": 1.0096, "step": 2875, "task_loss": 1.9950792789459229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5181125402450562, "epoch": 2.43, "learning_rate": 3.263494747011231e-05, "loss": 0.8841, "step": 2876, "task_loss": 0.7141669392585754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9992743730545044, "epoch": 2.43, "learning_rate": 3.2628909551986476e-05, "loss": 0.7069, "step": 2877, "task_loss": 0.7759259343147278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7502714395523071, "epoch": 2.43, "learning_rate": 3.262287163386065e-05, "loss": 0.8964, "step": 2878, "task_loss": 1.2772576808929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8539506793022156, "epoch": 2.43, "learning_rate": 3.261683371573482e-05, "loss": 0.7983, "step": 2879, "task_loss": 0.4517708420753479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0385268926620483, "epoch": 2.43, "learning_rate": 3.2610795797608984e-05, "loss": 0.8242, "step": 2880, "task_loss": 1.0699708461761475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9664639234542847, "epoch": 2.44, "learning_rate": 3.260475787948316e-05, "loss": 0.9845, "step": 2881, "task_loss": 0.5256713032722473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1014221906661987, "epoch": 2.44, "learning_rate": 3.2598719961357325e-05, "loss": 1.0038, "step": 2882, "task_loss": 0.7624884247779846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.132041573524475, "epoch": 2.44, "learning_rate": 3.259268204323149e-05, "loss": 0.736, "step": 2883, "task_loss": 0.7164139151573181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49275892972946167, "epoch": 2.44, "learning_rate": 3.2586644125105666e-05, "loss": 0.7155, "step": 2884, "task_loss": 0.17327173054218292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.59247624874115, "epoch": 2.44, "learning_rate": 3.2580606206979833e-05, "loss": 0.9866, "step": 2885, "task_loss": 1.4928157329559326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7771472334861755, "epoch": 2.44, "learning_rate": 3.257456828885401e-05, "loss": 1.2261, "step": 2886, "task_loss": 0.9209288954734802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2777198255062103, "epoch": 2.44, "learning_rate": 3.2568530370728175e-05, "loss": 0.7097, "step": 2887, "task_loss": 0.3477313220500946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48255491256713867, "epoch": 2.44, "learning_rate": 3.256249245260234e-05, "loss": 0.7836, "step": 2888, "task_loss": 0.5507585406303406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7601661682128906, "epoch": 2.44, "learning_rate": 3.2556454534476516e-05, "loss": 0.8158, "step": 2889, "task_loss": 0.4794238805770874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6322106719017029, "epoch": 2.44, "learning_rate": 3.255041661635068e-05, "loss": 0.755, "step": 2890, "task_loss": 0.31476354598999023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4842289984226227, "epoch": 2.44, "learning_rate": 3.254437869822485e-05, "loss": 0.6458, "step": 2891, "task_loss": 0.18893949687480927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7494486570358276, "epoch": 2.44, "learning_rate": 3.2538340780099024e-05, "loss": 1.0072, "step": 2892, "task_loss": 0.28199902176856995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.688312292098999, "epoch": 2.45, "learning_rate": 3.253230286197319e-05, "loss": 0.7717, "step": 2893, "task_loss": 0.3075966536998749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6612006425857544, "epoch": 2.45, "learning_rate": 3.2526264943847365e-05, "loss": 0.6545, "step": 2894, "task_loss": 0.6770530939102173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6166071891784668, "epoch": 2.45, "learning_rate": 3.252022702572153e-05, "loss": 1.0283, "step": 2895, "task_loss": 0.7821600437164307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9795551300048828, "epoch": 2.45, "learning_rate": 3.2514189107595706e-05, "loss": 0.76, "step": 2896, "task_loss": 0.22820217907428741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3775837421417236, "epoch": 2.45, "learning_rate": 3.2508151189469873e-05, "loss": 0.9753, "step": 2897, "task_loss": 1.1048611402511597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7362343072891235, "epoch": 2.45, "learning_rate": 3.250211327134404e-05, "loss": 1.0185, "step": 2898, "task_loss": 0.7415258288383484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6338307857513428, "epoch": 2.45, "learning_rate": 3.2496075353218215e-05, "loss": 0.7083, "step": 2899, "task_loss": 0.5021026134490967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7585917711257935, "epoch": 2.45, "learning_rate": 3.249003743509238e-05, "loss": 1.0034, "step": 2900, "task_loss": 0.4196048676967621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.132533311843872, "epoch": 2.45, "learning_rate": 3.248399951696655e-05, "loss": 1.0083, "step": 2901, "task_loss": 1.1303118467330933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.837515652179718, "epoch": 2.45, "learning_rate": 3.247796159884072e-05, "loss": 0.8018, "step": 2902, "task_loss": 0.6076485514640808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0906003713607788, "epoch": 2.45, "learning_rate": 3.247192368071489e-05, "loss": 0.796, "step": 2903, "task_loss": 0.8274542689323425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7722394466400146, "epoch": 2.45, "learning_rate": 3.246588576258906e-05, "loss": 1.0006, "step": 2904, "task_loss": 0.727177619934082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3603975772857666, "epoch": 2.46, "learning_rate": 3.245984784446323e-05, "loss": 0.9392, "step": 2905, "task_loss": 1.7391974925994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4708527326583862, "epoch": 2.46, "learning_rate": 3.2453809926337405e-05, "loss": 1.1332, "step": 2906, "task_loss": 1.1995388269424438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6748366355895996, "epoch": 2.46, "learning_rate": 3.2447772008211566e-05, "loss": 0.9604, "step": 2907, "task_loss": 1.1734504699707031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5978589057922363, "epoch": 2.46, "learning_rate": 3.244173409008574e-05, "loss": 0.8322, "step": 2908, "task_loss": 0.5152594447135925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0617574453353882, "epoch": 2.46, "learning_rate": 3.2435696171959914e-05, "loss": 1.09, "step": 2909, "task_loss": 1.6811085939407349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.708771824836731, "epoch": 2.46, "learning_rate": 3.242965825383408e-05, "loss": 0.8687, "step": 2910, "task_loss": 1.0094149112701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6269038915634155, "epoch": 2.46, "learning_rate": 3.242362033570825e-05, "loss": 0.7852, "step": 2911, "task_loss": 0.4934190809726715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0415931940078735, "epoch": 2.46, "learning_rate": 3.241758241758242e-05, "loss": 0.8899, "step": 2912, "task_loss": 0.5737878084182739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.421137809753418, "epoch": 2.46, "learning_rate": 3.241154449945659e-05, "loss": 1.1366, "step": 2913, "task_loss": 0.7453801035881042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9945685863494873, "epoch": 2.46, "learning_rate": 3.2405506581330756e-05, "loss": 1.0365, "step": 2914, "task_loss": 1.2096803188323975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9649375081062317, "epoch": 2.46, "learning_rate": 3.239946866320493e-05, "loss": 0.7832, "step": 2915, "task_loss": 1.0220212936401367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9059720635414124, "epoch": 2.46, "learning_rate": 3.2393430745079104e-05, "loss": 1.0342, "step": 2916, "task_loss": 1.1540971994400024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7540695071220398, "epoch": 2.47, "learning_rate": 3.2387392826953265e-05, "loss": 0.9253, "step": 2917, "task_loss": 0.6525171995162964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8361202478408813, "epoch": 2.47, "learning_rate": 3.238135490882744e-05, "loss": 0.8631, "step": 2918, "task_loss": 0.369408518075943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7345149517059326, "epoch": 2.47, "learning_rate": 3.237531699070161e-05, "loss": 0.8672, "step": 2919, "task_loss": 0.14087851345539093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8696631193161011, "epoch": 2.47, "learning_rate": 3.236927907257577e-05, "loss": 0.9238, "step": 2920, "task_loss": 0.7390062808990479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4212777614593506, "epoch": 2.47, "learning_rate": 3.236324115444995e-05, "loss": 0.8344, "step": 2921, "task_loss": 0.5525619387626648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6521879434585571, "epoch": 2.47, "learning_rate": 3.235720323632412e-05, "loss": 0.8863, "step": 2922, "task_loss": 0.7836416959762573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7679495811462402, "epoch": 2.47, "learning_rate": 3.235116531819828e-05, "loss": 0.8288, "step": 2923, "task_loss": 0.7002469897270203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4390490651130676, "epoch": 2.47, "learning_rate": 3.2345127400072455e-05, "loss": 0.8028, "step": 2924, "task_loss": 1.423789381980896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5713815093040466, "epoch": 2.47, "learning_rate": 3.233908948194663e-05, "loss": 0.8462, "step": 2925, "task_loss": 0.6427159309387207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4166971445083618, "epoch": 2.47, "learning_rate": 3.2333051563820796e-05, "loss": 0.655, "step": 2926, "task_loss": 1.1895174980163574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8471949100494385, "epoch": 2.47, "learning_rate": 3.2327013645694964e-05, "loss": 0.834, "step": 2927, "task_loss": 0.794459879398346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6519914269447327, "epoch": 2.47, "learning_rate": 3.232097572756914e-05, "loss": 0.7574, "step": 2928, "task_loss": 0.8581552505493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5764175653457642, "epoch": 2.48, "learning_rate": 3.2314937809443305e-05, "loss": 0.9341, "step": 2929, "task_loss": 0.5125983357429504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27868857979774475, "epoch": 2.48, "learning_rate": 3.230889989131747e-05, "loss": 0.6759, "step": 2930, "task_loss": 0.6560837626457214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6543096303939819, "epoch": 2.48, "learning_rate": 3.2302861973191646e-05, "loss": 0.6799, "step": 2931, "task_loss": 0.20707793533802032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.816749632358551, "epoch": 2.48, "learning_rate": 3.229682405506581e-05, "loss": 0.9246, "step": 2932, "task_loss": 0.8930086493492126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7288960218429565, "epoch": 2.48, "learning_rate": 3.229078613693998e-05, "loss": 0.7857, "step": 2933, "task_loss": 0.42078742384910583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6835066080093384, "epoch": 2.48, "learning_rate": 3.2284748218814154e-05, "loss": 0.8932, "step": 2934, "task_loss": 1.129055142402649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4528479278087616, "epoch": 2.48, "learning_rate": 3.227871030068833e-05, "loss": 0.6802, "step": 2935, "task_loss": 0.05345381051301956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.053920030593872, "epoch": 2.48, "learning_rate": 3.2272672382562495e-05, "loss": 0.8972, "step": 2936, "task_loss": 0.4809158742427826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9291329979896545, "epoch": 2.48, "learning_rate": 3.226663446443666e-05, "loss": 0.9904, "step": 2937, "task_loss": 0.6103377938270569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6649560332298279, "epoch": 2.48, "learning_rate": 3.2260596546310836e-05, "loss": 0.7974, "step": 2938, "task_loss": 0.2971968650817871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2891366481781006, "epoch": 2.48, "learning_rate": 3.2254558628185004e-05, "loss": 1.0864, "step": 2939, "task_loss": 1.5032516717910767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9032121896743774, "epoch": 2.48, "learning_rate": 3.224852071005917e-05, "loss": 0.8203, "step": 2940, "task_loss": 0.8152036070823669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.300257682800293, "epoch": 2.49, "learning_rate": 3.2242482791933345e-05, "loss": 0.9658, "step": 2941, "task_loss": 1.5385836362838745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1240324974060059, "epoch": 2.49, "learning_rate": 3.223644487380751e-05, "loss": 0.8849, "step": 2942, "task_loss": 1.1655923128128052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9554330110549927, "epoch": 2.49, "learning_rate": 3.223040695568168e-05, "loss": 0.9336, "step": 2943, "task_loss": 0.6401989459991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8290506601333618, "epoch": 2.49, "learning_rate": 3.222436903755585e-05, "loss": 0.9738, "step": 2944, "task_loss": 0.9150649309158325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1051589250564575, "epoch": 2.49, "learning_rate": 3.221833111943002e-05, "loss": 0.8357, "step": 2945, "task_loss": 0.9484920501708984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5697426199913025, "epoch": 2.49, "learning_rate": 3.2212293201304194e-05, "loss": 0.919, "step": 2946, "task_loss": 1.0301179885864258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9359353184700012, "epoch": 2.49, "learning_rate": 3.220625528317836e-05, "loss": 0.9764, "step": 2947, "task_loss": 1.0430269241333008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7498339414596558, "epoch": 2.49, "learning_rate": 3.220021736505253e-05, "loss": 0.779, "step": 2948, "task_loss": 0.6550315618515015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0643386840820312, "epoch": 2.49, "learning_rate": 3.21941794469267e-05, "loss": 0.9679, "step": 2949, "task_loss": 1.3420933485031128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7117494940757751, "epoch": 2.49, "learning_rate": 3.218814152880087e-05, "loss": 0.8809, "step": 2950, "task_loss": 0.6326349973678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7402175664901733, "epoch": 2.49, "learning_rate": 3.2182103610675044e-05, "loss": 0.7969, "step": 2951, "task_loss": 0.5275585651397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5870144963264465, "epoch": 2.5, "learning_rate": 3.217606569254921e-05, "loss": 0.7889, "step": 2952, "task_loss": 1.1345893144607544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.132422685623169, "epoch": 2.5, "learning_rate": 3.217002777442338e-05, "loss": 1.0642, "step": 2953, "task_loss": 0.45857974886894226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6052035689353943, "epoch": 2.5, "learning_rate": 3.216398985629755e-05, "loss": 0.7485, "step": 2954, "task_loss": 0.18246588110923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.745585560798645, "epoch": 2.5, "learning_rate": 3.215795193817172e-05, "loss": 0.8852, "step": 2955, "task_loss": 1.3094991445541382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9829490780830383, "epoch": 2.5, "learning_rate": 3.215191402004589e-05, "loss": 1.1625, "step": 2956, "task_loss": 0.8204315900802612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4937748908996582, "epoch": 2.5, "learning_rate": 3.214587610192006e-05, "loss": 1.0725, "step": 2957, "task_loss": 0.8999999761581421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6362663507461548, "epoch": 2.5, "learning_rate": 3.213983818379423e-05, "loss": 0.7003, "step": 2958, "task_loss": 0.8550742864608765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5605192184448242, "epoch": 2.5, "learning_rate": 3.21338002656684e-05, "loss": 0.5748, "step": 2959, "task_loss": 0.22620804607868195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6340289115905762, "epoch": 2.5, "learning_rate": 3.212776234754257e-05, "loss": 0.7998, "step": 2960, "task_loss": 0.37425655126571655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8747180700302124, "epoch": 2.5, "learning_rate": 3.2121724429416736e-05, "loss": 0.8811, "step": 2961, "task_loss": 0.8462705016136169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5913041830062866, "epoch": 2.5, "learning_rate": 3.211568651129091e-05, "loss": 0.6872, "step": 2962, "task_loss": 1.6385858058929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.849287748336792, "epoch": 2.5, "learning_rate": 3.210964859316508e-05, "loss": 0.9858, "step": 2963, "task_loss": 0.7815146446228027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0486512184143066, "epoch": 2.51, "learning_rate": 3.2103610675039244e-05, "loss": 0.7977, "step": 2964, "task_loss": 0.8510364294052124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6807609796524048, "epoch": 2.51, "learning_rate": 3.209757275691342e-05, "loss": 0.774, "step": 2965, "task_loss": 0.4950663149356842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4242117702960968, "epoch": 2.51, "learning_rate": 3.209153483878759e-05, "loss": 0.5973, "step": 2966, "task_loss": 0.6834632754325867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39475733041763306, "epoch": 2.51, "learning_rate": 3.208549692066176e-05, "loss": 0.6665, "step": 2967, "task_loss": 0.7661213278770447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8595190644264221, "epoch": 2.51, "learning_rate": 3.2079459002535926e-05, "loss": 0.8313, "step": 2968, "task_loss": 0.6501460075378418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8260771036148071, "epoch": 2.51, "learning_rate": 3.20734210844101e-05, "loss": 0.789, "step": 2969, "task_loss": 0.9016792178153992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7676417231559753, "epoch": 2.51, "learning_rate": 3.206738316628427e-05, "loss": 0.7597, "step": 2970, "task_loss": 1.440747857093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5636439919471741, "epoch": 2.51, "learning_rate": 3.2061345248158435e-05, "loss": 0.7299, "step": 2971, "task_loss": 0.2645760178565979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9017335176467896, "epoch": 2.51, "learning_rate": 3.205530733003261e-05, "loss": 0.7208, "step": 2972, "task_loss": 0.6191311478614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7303603291511536, "epoch": 2.51, "learning_rate": 3.2049269411906776e-05, "loss": 1.0316, "step": 2973, "task_loss": 1.1697274446487427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2573659420013428, "epoch": 2.51, "learning_rate": 3.204323149378094e-05, "loss": 0.8891, "step": 2974, "task_loss": 0.6063340902328491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2797832489013672, "epoch": 2.51, "learning_rate": 3.203719357565512e-05, "loss": 1.065, "step": 2975, "task_loss": 1.8085486888885498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0743029117584229, "epoch": 2.52, "learning_rate": 3.203115565752929e-05, "loss": 0.9763, "step": 2976, "task_loss": 1.7480192184448242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.512091338634491, "epoch": 2.52, "learning_rate": 3.202511773940345e-05, "loss": 0.7786, "step": 2977, "task_loss": 0.24623432755470276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.981354832649231, "epoch": 2.52, "learning_rate": 3.2019079821277625e-05, "loss": 0.7434, "step": 2978, "task_loss": 0.6764379143714905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.519895315170288, "epoch": 2.52, "learning_rate": 3.20130419031518e-05, "loss": 0.9868, "step": 2979, "task_loss": 0.7029870748519897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.362274408340454, "epoch": 2.52, "learning_rate": 3.200700398502596e-05, "loss": 0.8779, "step": 2980, "task_loss": 1.2854820489883423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0162653923034668, "epoch": 2.52, "learning_rate": 3.2000966066900134e-05, "loss": 0.9447, "step": 2981, "task_loss": 0.4324296712875366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.697432279586792, "epoch": 2.52, "learning_rate": 3.199492814877431e-05, "loss": 0.8042, "step": 2982, "task_loss": 0.09608100354671478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0489168167114258, "epoch": 2.52, "learning_rate": 3.1988890230648475e-05, "loss": 0.9978, "step": 2983, "task_loss": 0.8962480425834656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8684440851211548, "epoch": 2.52, "learning_rate": 3.198285231252264e-05, "loss": 0.8753, "step": 2984, "task_loss": 0.7597934603691101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9698823690414429, "epoch": 2.52, "learning_rate": 3.1976814394396816e-05, "loss": 0.7752, "step": 2985, "task_loss": 0.43564245104789734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7125217914581299, "epoch": 2.52, "learning_rate": 3.197077647627098e-05, "loss": 0.5691, "step": 2986, "task_loss": 0.5527481436729431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0032899379730225, "epoch": 2.52, "learning_rate": 3.196473855814515e-05, "loss": 0.86, "step": 2987, "task_loss": 1.0519012212753296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5853729248046875, "epoch": 2.53, "learning_rate": 3.1958700640019324e-05, "loss": 0.6881, "step": 2988, "task_loss": 0.742026150226593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9301421642303467, "epoch": 2.53, "learning_rate": 3.195266272189349e-05, "loss": 0.8811, "step": 2989, "task_loss": 0.524196445941925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6418254971504211, "epoch": 2.53, "learning_rate": 3.194662480376766e-05, "loss": 0.6905, "step": 2990, "task_loss": 0.4861406981945038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7434477806091309, "epoch": 2.53, "learning_rate": 3.194058688564183e-05, "loss": 0.8005, "step": 2991, "task_loss": 1.0192149877548218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6885080933570862, "epoch": 2.53, "learning_rate": 3.1934548967516007e-05, "loss": 1.0182, "step": 2992, "task_loss": 1.0171197652816772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.048293948173523, "epoch": 2.53, "learning_rate": 3.192851104939017e-05, "loss": 0.9432, "step": 2993, "task_loss": 0.9086986780166626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7131342887878418, "epoch": 2.53, "learning_rate": 3.192247313126434e-05, "loss": 0.6426, "step": 2994, "task_loss": 1.1337015628814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2279751300811768, "epoch": 2.53, "learning_rate": 3.1916435213138515e-05, "loss": 0.7557, "step": 2995, "task_loss": 1.118836760520935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6218761205673218, "epoch": 2.53, "learning_rate": 3.1910397295012675e-05, "loss": 0.8212, "step": 2996, "task_loss": 0.9928039908409119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3336478471755981, "epoch": 2.53, "learning_rate": 3.190435937688685e-05, "loss": 0.7164, "step": 2997, "task_loss": 0.604114830493927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9010549187660217, "epoch": 2.53, "learning_rate": 3.189832145876102e-05, "loss": 0.8179, "step": 2998, "task_loss": 0.7371923923492432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9644243717193604, "epoch": 2.53, "learning_rate": 3.189228354063519e-05, "loss": 0.8855, "step": 2999, "task_loss": 0.88642418384552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0338208675384521, "epoch": 2.54, "learning_rate": 3.188624562250936e-05, "loss": 1.0236, "step": 3000, "task_loss": 0.3367408215999603 }, { "epoch": 2.54, "eval_accuracy": 0.8822574257425743, "eval_loss": 0.5016362071037292, "eval_runtime": 227.3911, "eval_samples_per_second": 111.042, "eval_steps_per_second": 0.871, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6137768030166626, "epoch": 2.54, "learning_rate": 3.188020770438353e-05, "loss": 0.7235, "step": 3001, "task_loss": 0.271555095911026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6915023326873779, "epoch": 2.54, "learning_rate": 3.18741697862577e-05, "loss": 0.8103, "step": 3002, "task_loss": 0.9231040477752686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.659912109375, "epoch": 2.54, "learning_rate": 3.1868131868131866e-05, "loss": 0.7704, "step": 3003, "task_loss": 0.9349153637886047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42083296179771423, "epoch": 2.54, "learning_rate": 3.186209395000604e-05, "loss": 0.8225, "step": 3004, "task_loss": 0.07340937852859497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5846946239471436, "epoch": 2.54, "learning_rate": 3.185605603188021e-05, "loss": 0.735, "step": 3005, "task_loss": 0.9627697467803955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1618759632110596, "epoch": 2.54, "learning_rate": 3.1850018113754374e-05, "loss": 1.0428, "step": 3006, "task_loss": 1.5028562545776367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.761713981628418, "epoch": 2.54, "learning_rate": 3.184398019562855e-05, "loss": 0.7175, "step": 3007, "task_loss": 0.6191214323043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7332868576049805, "epoch": 2.54, "learning_rate": 3.183794227750272e-05, "loss": 0.709, "step": 3008, "task_loss": 0.5360862016677856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8282040357589722, "epoch": 2.54, "learning_rate": 3.183190435937689e-05, "loss": 0.7121, "step": 3009, "task_loss": 1.2541381120681763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6773777008056641, "epoch": 2.54, "learning_rate": 3.1825866441251057e-05, "loss": 0.7403, "step": 3010, "task_loss": 0.9211320877075195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8626466989517212, "epoch": 2.54, "learning_rate": 3.181982852312523e-05, "loss": 0.6791, "step": 3011, "task_loss": 0.8365532755851746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44061243534088135, "epoch": 2.55, "learning_rate": 3.18137906049994e-05, "loss": 0.4858, "step": 3012, "task_loss": 0.30462759733200073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5855493545532227, "epoch": 2.55, "learning_rate": 3.1807752686873565e-05, "loss": 0.7486, "step": 3013, "task_loss": 0.5436987280845642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.089611530303955, "epoch": 2.55, "learning_rate": 3.180171476874774e-05, "loss": 0.7826, "step": 3014, "task_loss": 0.9176214933395386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8924753665924072, "epoch": 2.55, "learning_rate": 3.1795676850621906e-05, "loss": 0.908, "step": 3015, "task_loss": 0.5962092876434326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.076035737991333, "epoch": 2.55, "learning_rate": 3.178963893249607e-05, "loss": 1.1465, "step": 3016, "task_loss": 1.4187004566192627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9886857867240906, "epoch": 2.55, "learning_rate": 3.178360101437025e-05, "loss": 1.2795, "step": 3017, "task_loss": 1.4709323644638062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9635641574859619, "epoch": 2.55, "learning_rate": 3.1777563096244414e-05, "loss": 0.7376, "step": 3018, "task_loss": 0.8006808757781982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.098738431930542, "epoch": 2.55, "learning_rate": 3.177152517811859e-05, "loss": 0.9761, "step": 3019, "task_loss": 1.2263888120651245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7483527660369873, "epoch": 2.55, "learning_rate": 3.1765487259992755e-05, "loss": 0.7067, "step": 3020, "task_loss": 0.7314642667770386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5958069562911987, "epoch": 2.55, "learning_rate": 3.175944934186692e-05, "loss": 0.733, "step": 3021, "task_loss": 0.6028617024421692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5792432427406311, "epoch": 2.55, "learning_rate": 3.17534114237411e-05, "loss": 0.8012, "step": 3022, "task_loss": 1.1551947593688965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5220740437507629, "epoch": 2.56, "learning_rate": 3.1747373505615264e-05, "loss": 0.8099, "step": 3023, "task_loss": 1.2454135417938232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6018650531768799, "epoch": 2.56, "learning_rate": 3.174133558748944e-05, "loss": 0.6439, "step": 3024, "task_loss": 0.16588714718818665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8027732372283936, "epoch": 2.56, "learning_rate": 3.1735297669363605e-05, "loss": 0.7643, "step": 3025, "task_loss": 0.8107626438140869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.727070152759552, "epoch": 2.56, "learning_rate": 3.172925975123777e-05, "loss": 0.8428, "step": 3026, "task_loss": 0.8587751388549805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7050327658653259, "epoch": 2.56, "learning_rate": 3.1723221833111946e-05, "loss": 0.7917, "step": 3027, "task_loss": 0.8078317642211914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5212845802307129, "epoch": 2.56, "learning_rate": 3.171718391498611e-05, "loss": 0.9311, "step": 3028, "task_loss": 0.25558459758758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6377017498016357, "epoch": 2.56, "learning_rate": 3.171114599686029e-05, "loss": 0.8297, "step": 3029, "task_loss": 0.8167658448219299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8483272194862366, "epoch": 2.56, "learning_rate": 3.1705108078734454e-05, "loss": 0.6666, "step": 3030, "task_loss": 1.0536823272705078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47415128350257874, "epoch": 2.56, "learning_rate": 3.169907016060862e-05, "loss": 0.6084, "step": 3031, "task_loss": 0.6789657473564148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.128638505935669, "epoch": 2.56, "learning_rate": 3.1693032242482796e-05, "loss": 1.1184, "step": 3032, "task_loss": 1.1000689268112183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7621657252311707, "epoch": 2.56, "learning_rate": 3.168699432435696e-05, "loss": 0.8358, "step": 3033, "task_loss": 0.979108989238739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3162500858306885, "epoch": 2.56, "learning_rate": 3.168095640623113e-05, "loss": 1.0163, "step": 3034, "task_loss": 2.5295801162719727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7265914678573608, "epoch": 2.57, "learning_rate": 3.1674918488105304e-05, "loss": 0.8471, "step": 3035, "task_loss": 1.5632015466690063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7244623303413391, "epoch": 2.57, "learning_rate": 3.166888056997947e-05, "loss": 0.86, "step": 3036, "task_loss": 1.9515035152435303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5082241892814636, "epoch": 2.57, "learning_rate": 3.166284265185364e-05, "loss": 0.6857, "step": 3037, "task_loss": 0.49375560879707336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5705839395523071, "epoch": 2.57, "learning_rate": 3.165680473372781e-05, "loss": 0.5545, "step": 3038, "task_loss": 1.2205630540847778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9945900440216064, "epoch": 2.57, "learning_rate": 3.1650766815601986e-05, "loss": 0.7314, "step": 3039, "task_loss": 1.4323033094406128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7085701823234558, "epoch": 2.57, "learning_rate": 3.164472889747615e-05, "loss": 0.8374, "step": 3040, "task_loss": 1.307729721069336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.029331922531128, "epoch": 2.57, "learning_rate": 3.163869097935032e-05, "loss": 0.8646, "step": 3041, "task_loss": 1.0929069519042969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8046635389328003, "epoch": 2.57, "learning_rate": 3.1632653061224494e-05, "loss": 0.7035, "step": 3042, "task_loss": 1.3054965734481812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.9130637645721436, "epoch": 2.57, "learning_rate": 3.162661514309866e-05, "loss": 1.1315, "step": 3043, "task_loss": 1.776525616645813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7463591694831848, "epoch": 2.57, "learning_rate": 3.162057722497283e-05, "loss": 0.9202, "step": 3044, "task_loss": 0.7726787328720093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7884398698806763, "epoch": 2.57, "learning_rate": 3.1614539306847e-05, "loss": 0.9153, "step": 3045, "task_loss": 0.7687554359436035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7593898773193359, "epoch": 2.57, "learning_rate": 3.160850138872117e-05, "loss": 0.7302, "step": 3046, "task_loss": 0.28603827953338623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8466780185699463, "epoch": 2.58, "learning_rate": 3.160246347059534e-05, "loss": 0.7639, "step": 3047, "task_loss": 0.5504293441772461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9991329312324524, "epoch": 2.58, "learning_rate": 3.159642555246951e-05, "loss": 0.8233, "step": 3048, "task_loss": 1.758773922920227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8021063208580017, "epoch": 2.58, "learning_rate": 3.1590387634343685e-05, "loss": 0.8472, "step": 3049, "task_loss": 1.1081922054290771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6507203578948975, "epoch": 2.58, "learning_rate": 3.1584349716217846e-05, "loss": 1.0514, "step": 3050, "task_loss": 0.5546426773071289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7957897782325745, "epoch": 2.58, "learning_rate": 3.157831179809202e-05, "loss": 0.7781, "step": 3051, "task_loss": 1.141638994216919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5324211120605469, "epoch": 2.58, "learning_rate": 3.1572273879966193e-05, "loss": 0.9575, "step": 3052, "task_loss": 1.2046325206756592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45889613032341003, "epoch": 2.58, "learning_rate": 3.1566235961840354e-05, "loss": 0.7687, "step": 3053, "task_loss": 0.9140346646308899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4761022925376892, "epoch": 2.58, "learning_rate": 3.156019804371453e-05, "loss": 0.6024, "step": 3054, "task_loss": 0.8140660524368286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7592402696609497, "epoch": 2.58, "learning_rate": 3.15541601255887e-05, "loss": 1.0425, "step": 3055, "task_loss": 1.0749025344848633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4261313676834106, "epoch": 2.58, "learning_rate": 3.154812220746287e-05, "loss": 0.8604, "step": 3056, "task_loss": 1.2256821393966675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38518714904785156, "epoch": 2.58, "learning_rate": 3.1542084289337036e-05, "loss": 0.6433, "step": 3057, "task_loss": 0.4681018888950348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7753989696502686, "epoch": 2.58, "learning_rate": 3.153604637121121e-05, "loss": 0.7666, "step": 3058, "task_loss": 1.3992558717727661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23168912529945374, "epoch": 2.59, "learning_rate": 3.153000845308538e-05, "loss": 0.6235, "step": 3059, "task_loss": 0.07382439821958542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5133727788925171, "epoch": 2.59, "learning_rate": 3.1523970534959544e-05, "loss": 0.795, "step": 3060, "task_loss": 0.8130411505699158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.837613582611084, "epoch": 2.59, "learning_rate": 3.151793261683372e-05, "loss": 0.8778, "step": 3061, "task_loss": 0.8012933135032654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7988501787185669, "epoch": 2.59, "learning_rate": 3.1511894698707886e-05, "loss": 0.8781, "step": 3062, "task_loss": 1.2246960401535034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5927332639694214, "epoch": 2.59, "learning_rate": 3.150585678058205e-05, "loss": 0.8893, "step": 3063, "task_loss": 1.0391918420791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3367778062820435, "epoch": 2.59, "learning_rate": 3.149981886245623e-05, "loss": 1.1085, "step": 3064, "task_loss": 1.1869428157806396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.576911211013794, "epoch": 2.59, "learning_rate": 3.14937809443304e-05, "loss": 1.0024, "step": 3065, "task_loss": 1.638088583946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9077416062355042, "epoch": 2.59, "learning_rate": 3.148774302620456e-05, "loss": 0.7884, "step": 3066, "task_loss": 1.0289536714553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8118168711662292, "epoch": 2.59, "learning_rate": 3.1481705108078735e-05, "loss": 0.82, "step": 3067, "task_loss": 0.5601198077201843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7285646200180054, "epoch": 2.59, "learning_rate": 3.147566718995291e-05, "loss": 0.8963, "step": 3068, "task_loss": 0.9830752015113831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6709139943122864, "epoch": 2.59, "learning_rate": 3.1469629271827076e-05, "loss": 0.886, "step": 3069, "task_loss": 1.1587276458740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.618017852306366, "epoch": 2.59, "learning_rate": 3.146359135370124e-05, "loss": 0.6264, "step": 3070, "task_loss": 0.818087100982666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8378186225891113, "epoch": 2.6, "learning_rate": 3.145755343557542e-05, "loss": 0.9971, "step": 3071, "task_loss": 1.3869907855987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0918958187103271, "epoch": 2.6, "learning_rate": 3.1451515517449585e-05, "loss": 0.9418, "step": 3072, "task_loss": 0.6294479370117188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5628905892372131, "epoch": 2.6, "learning_rate": 3.144547759932375e-05, "loss": 0.8751, "step": 3073, "task_loss": 0.973656177520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5180924534797668, "epoch": 2.6, "learning_rate": 3.1439439681197926e-05, "loss": 0.7902, "step": 3074, "task_loss": 0.3605163097381592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7125868201255798, "epoch": 2.6, "learning_rate": 3.143340176307209e-05, "loss": 0.8437, "step": 3075, "task_loss": 0.8415375351905823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6082790493965149, "epoch": 2.6, "learning_rate": 3.142736384494626e-05, "loss": 0.7835, "step": 3076, "task_loss": 1.2885019779205322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38996610045433044, "epoch": 2.6, "learning_rate": 3.1421325926820434e-05, "loss": 0.7277, "step": 3077, "task_loss": 1.1668273210525513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6524943709373474, "epoch": 2.6, "learning_rate": 3.14152880086946e-05, "loss": 0.6903, "step": 3078, "task_loss": 0.7020797729492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7283225059509277, "epoch": 2.6, "learning_rate": 3.1409250090568775e-05, "loss": 0.7024, "step": 3079, "task_loss": 1.188352108001709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7896183729171753, "epoch": 2.6, "learning_rate": 3.140321217244294e-05, "loss": 0.9581, "step": 3080, "task_loss": 0.18475015461444855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9431156516075134, "epoch": 2.6, "learning_rate": 3.1397174254317116e-05, "loss": 0.7703, "step": 3081, "task_loss": 1.1637721061706543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8105146884918213, "epoch": 2.6, "learning_rate": 3.1391136336191283e-05, "loss": 0.7755, "step": 3082, "task_loss": 0.6414182186126709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6288378238677979, "epoch": 2.61, "learning_rate": 3.138509841806545e-05, "loss": 0.7791, "step": 3083, "task_loss": 0.66705322265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7495559453964233, "epoch": 2.61, "learning_rate": 3.1379060499939625e-05, "loss": 0.6502, "step": 3084, "task_loss": 1.3439288139343262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8147602081298828, "epoch": 2.61, "learning_rate": 3.137302258181379e-05, "loss": 0.8291, "step": 3085, "task_loss": 0.8545053601264954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0447710752487183, "epoch": 2.61, "learning_rate": 3.136698466368796e-05, "loss": 0.7472, "step": 3086, "task_loss": 0.6808056831359863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38031962513923645, "epoch": 2.61, "learning_rate": 3.136094674556213e-05, "loss": 0.6652, "step": 3087, "task_loss": 0.2978646159172058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5546494722366333, "epoch": 2.61, "learning_rate": 3.13549088274363e-05, "loss": 0.8103, "step": 3088, "task_loss": 0.8973349928855896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1533368825912476, "epoch": 2.61, "learning_rate": 3.1348870909310474e-05, "loss": 0.8295, "step": 3089, "task_loss": 0.9379809498786926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5743615627288818, "epoch": 2.61, "learning_rate": 3.134283299118464e-05, "loss": 0.7668, "step": 3090, "task_loss": 0.7315385341644287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8335127830505371, "epoch": 2.61, "learning_rate": 3.133679507305881e-05, "loss": 0.6969, "step": 3091, "task_loss": 0.3726682662963867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.624243974685669, "epoch": 2.61, "learning_rate": 3.133075715493298e-05, "loss": 0.6344, "step": 3092, "task_loss": 0.9143445491790771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0778112411499023, "epoch": 2.61, "learning_rate": 3.132471923680715e-05, "loss": 0.8574, "step": 3093, "task_loss": 1.0519269704818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1987361907958984, "epoch": 2.61, "learning_rate": 3.131868131868132e-05, "loss": 0.8806, "step": 3094, "task_loss": 1.0485796928405762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6606401801109314, "epoch": 2.62, "learning_rate": 3.131264340055549e-05, "loss": 0.7936, "step": 3095, "task_loss": 0.9130484461784363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4712564945220947, "epoch": 2.62, "learning_rate": 3.130660548242966e-05, "loss": 0.6055, "step": 3096, "task_loss": 0.4532923698425293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.084006667137146, "epoch": 2.62, "learning_rate": 3.130056756430383e-05, "loss": 0.757, "step": 3097, "task_loss": 0.8979833722114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5892452597618103, "epoch": 2.62, "learning_rate": 3.1294529646178e-05, "loss": 0.7548, "step": 3098, "task_loss": 1.4288254976272583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7405765056610107, "epoch": 2.62, "learning_rate": 3.128849172805217e-05, "loss": 0.7866, "step": 3099, "task_loss": 0.9639761447906494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.563779890537262, "epoch": 2.62, "learning_rate": 3.128245380992634e-05, "loss": 0.7741, "step": 3100, "task_loss": 0.5200707316398621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2534862756729126, "epoch": 2.62, "learning_rate": 3.127641589180051e-05, "loss": 0.7147, "step": 3101, "task_loss": 2.0381574630737305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7990745306015015, "epoch": 2.62, "learning_rate": 3.127037797367468e-05, "loss": 0.7792, "step": 3102, "task_loss": 1.1806010007858276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3845534920692444, "epoch": 2.62, "learning_rate": 3.126434005554885e-05, "loss": 0.6327, "step": 3103, "task_loss": 0.2133253514766693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.693825364112854, "epoch": 2.62, "learning_rate": 3.1258302137423016e-05, "loss": 0.7545, "step": 3104, "task_loss": 0.6199763417243958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5323759317398071, "epoch": 2.62, "learning_rate": 3.125226421929719e-05, "loss": 0.7527, "step": 3105, "task_loss": 0.2206573635339737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4812619984149933, "epoch": 2.63, "learning_rate": 3.124622630117136e-05, "loss": 0.7548, "step": 3106, "task_loss": 0.27425816655158997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.792110025882721, "epoch": 2.63, "learning_rate": 3.1240188383045524e-05, "loss": 0.9064, "step": 3107, "task_loss": 0.28309768438339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7624838352203369, "epoch": 2.63, "learning_rate": 3.12341504649197e-05, "loss": 0.7936, "step": 3108, "task_loss": 0.527056097984314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6111307144165039, "epoch": 2.63, "learning_rate": 3.122811254679387e-05, "loss": 0.8503, "step": 3109, "task_loss": 1.0702322721481323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5091174840927124, "epoch": 2.63, "learning_rate": 3.122207462866803e-05, "loss": 0.6543, "step": 3110, "task_loss": 0.42716890573501587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4714750647544861, "epoch": 2.63, "learning_rate": 3.1216036710542206e-05, "loss": 0.6903, "step": 3111, "task_loss": 1.036287784576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8974889516830444, "epoch": 2.63, "learning_rate": 3.120999879241638e-05, "loss": 0.837, "step": 3112, "task_loss": 1.0372660160064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5923638343811035, "epoch": 2.63, "learning_rate": 3.120396087429055e-05, "loss": 0.613, "step": 3113, "task_loss": 0.47694918513298035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.103044033050537, "epoch": 2.63, "learning_rate": 3.1197922956164715e-05, "loss": 0.832, "step": 3114, "task_loss": 2.234790086746216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5502097606658936, "epoch": 2.63, "learning_rate": 3.119188503803889e-05, "loss": 0.8846, "step": 3115, "task_loss": 0.8045153021812439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7336527109146118, "epoch": 2.63, "learning_rate": 3.1185847119913056e-05, "loss": 0.7745, "step": 3116, "task_loss": 0.747377336025238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6990717053413391, "epoch": 2.63, "learning_rate": 3.117980920178722e-05, "loss": 0.7442, "step": 3117, "task_loss": 1.1197246313095093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4856246709823608, "epoch": 2.64, "learning_rate": 3.11737712836614e-05, "loss": 1.1407, "step": 3118, "task_loss": 2.2340946197509766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5579985976219177, "epoch": 2.64, "learning_rate": 3.1167733365535564e-05, "loss": 0.7828, "step": 3119, "task_loss": 0.27241620421409607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5260222554206848, "epoch": 2.64, "learning_rate": 3.116169544740973e-05, "loss": 0.7017, "step": 3120, "task_loss": 0.06501312553882599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5153244137763977, "epoch": 2.64, "learning_rate": 3.1155657529283905e-05, "loss": 0.6811, "step": 3121, "task_loss": 0.1749415397644043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6429804563522339, "epoch": 2.64, "learning_rate": 3.114961961115808e-05, "loss": 0.8026, "step": 3122, "task_loss": 0.09109488129615784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7604629993438721, "epoch": 2.64, "learning_rate": 3.114358169303224e-05, "loss": 0.749, "step": 3123, "task_loss": 0.7961565256118774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6233325004577637, "epoch": 2.64, "learning_rate": 3.1137543774906414e-05, "loss": 0.7718, "step": 3124, "task_loss": 0.48125159740448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2885315418243408, "epoch": 2.64, "learning_rate": 3.113150585678059e-05, "loss": 0.936, "step": 3125, "task_loss": 1.109978437423706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3276715576648712, "epoch": 2.64, "learning_rate": 3.112546793865475e-05, "loss": 0.8305, "step": 3126, "task_loss": 0.11410202831029892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36628955602645874, "epoch": 2.64, "learning_rate": 3.111943002052892e-05, "loss": 0.69, "step": 3127, "task_loss": 0.5616279244422913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8504805564880371, "epoch": 2.64, "learning_rate": 3.1113392102403096e-05, "loss": 0.8499, "step": 3128, "task_loss": 0.8784685730934143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3713712692260742, "epoch": 2.64, "learning_rate": 3.110735418427726e-05, "loss": 0.8338, "step": 3129, "task_loss": 0.27613407373428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8702806234359741, "epoch": 2.65, "learning_rate": 3.110131626615143e-05, "loss": 0.6871, "step": 3130, "task_loss": 1.1232000589370728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7637176513671875, "epoch": 2.65, "learning_rate": 3.1095278348025604e-05, "loss": 0.7968, "step": 3131, "task_loss": 0.7490792274475098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6001620888710022, "epoch": 2.65, "learning_rate": 3.108924042989977e-05, "loss": 0.9106, "step": 3132, "task_loss": 0.17001810669898987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0916117429733276, "epoch": 2.65, "learning_rate": 3.108320251177394e-05, "loss": 0.7219, "step": 3133, "task_loss": 0.7988651990890503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6563551425933838, "epoch": 2.65, "learning_rate": 3.107716459364811e-05, "loss": 0.9079, "step": 3134, "task_loss": 1.3962843418121338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8960903286933899, "epoch": 2.65, "learning_rate": 3.107112667552228e-05, "loss": 0.9444, "step": 3135, "task_loss": 1.1886022090911865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6900730133056641, "epoch": 2.65, "learning_rate": 3.106508875739645e-05, "loss": 0.7975, "step": 3136, "task_loss": 0.6726140975952148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.432685911655426, "epoch": 2.65, "learning_rate": 3.105905083927062e-05, "loss": 0.5401, "step": 3137, "task_loss": 0.6507964134216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.711495041847229, "epoch": 2.65, "learning_rate": 3.1053012921144795e-05, "loss": 1.0285, "step": 3138, "task_loss": 1.0023207664489746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.820142388343811, "epoch": 2.65, "learning_rate": 3.104697500301896e-05, "loss": 0.8467, "step": 3139, "task_loss": 1.6331762075424194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2314832210540771, "epoch": 2.65, "learning_rate": 3.104093708489313e-05, "loss": 0.8991, "step": 3140, "task_loss": 1.2430920600891113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.758161187171936, "epoch": 2.65, "learning_rate": 3.10348991667673e-05, "loss": 0.794, "step": 3141, "task_loss": 1.0795671939849854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4688737392425537, "epoch": 2.66, "learning_rate": 3.102886124864147e-05, "loss": 0.9038, "step": 3142, "task_loss": 0.23454007506370544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8360175490379333, "epoch": 2.66, "learning_rate": 3.102282333051564e-05, "loss": 0.8185, "step": 3143, "task_loss": 1.3201662302017212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5686139464378357, "epoch": 2.66, "learning_rate": 3.101678541238981e-05, "loss": 0.7297, "step": 3144, "task_loss": 0.3480425179004669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8402721285820007, "epoch": 2.66, "learning_rate": 3.101074749426398e-05, "loss": 0.7674, "step": 3145, "task_loss": 0.8077020645141602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5086648464202881, "epoch": 2.66, "learning_rate": 3.1004709576138146e-05, "loss": 0.7615, "step": 3146, "task_loss": 1.242014765739441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1624759435653687, "epoch": 2.66, "learning_rate": 3.099867165801232e-05, "loss": 0.8184, "step": 3147, "task_loss": 1.2872920036315918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4037091135978699, "epoch": 2.66, "learning_rate": 3.099263373988649e-05, "loss": 0.7922, "step": 3148, "task_loss": 0.31010016798973083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0317448377609253, "epoch": 2.66, "learning_rate": 3.098659582176066e-05, "loss": 0.8005, "step": 3149, "task_loss": 0.6081992983818054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5967278480529785, "epoch": 2.66, "learning_rate": 3.098055790363483e-05, "loss": 0.6801, "step": 3150, "task_loss": 0.9046477675437927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7411437630653381, "epoch": 2.66, "learning_rate": 3.0974519985508995e-05, "loss": 0.9658, "step": 3151, "task_loss": 0.948363721370697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0255472660064697, "epoch": 2.66, "learning_rate": 3.096848206738317e-05, "loss": 0.7736, "step": 3152, "task_loss": 0.9569589495658875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6174736618995667, "epoch": 2.66, "learning_rate": 3.0962444149257336e-05, "loss": 0.6975, "step": 3153, "task_loss": 0.7506214380264282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0273759365081787, "epoch": 2.67, "learning_rate": 3.095640623113151e-05, "loss": 0.7917, "step": 3154, "task_loss": 0.5526015758514404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8444082736968994, "epoch": 2.67, "learning_rate": 3.095036831300568e-05, "loss": 0.8802, "step": 3155, "task_loss": 0.8181344270706177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.035127878189087, "epoch": 2.67, "learning_rate": 3.0944330394879845e-05, "loss": 1.0111, "step": 3156, "task_loss": 0.9723500609397888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6508625745773315, "epoch": 2.67, "learning_rate": 3.093829247675402e-05, "loss": 0.6386, "step": 3157, "task_loss": 0.2908296585083008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8350207209587097, "epoch": 2.67, "learning_rate": 3.0932254558628186e-05, "loss": 1.164, "step": 3158, "task_loss": 1.3280986547470093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0200244188308716, "epoch": 2.67, "learning_rate": 3.092621664050236e-05, "loss": 0.8382, "step": 3159, "task_loss": 1.6663730144500732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.043365478515625, "epoch": 2.67, "learning_rate": 3.092017872237653e-05, "loss": 0.8781, "step": 3160, "task_loss": 0.9484541416168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9852643013000488, "epoch": 2.67, "learning_rate": 3.0914140804250694e-05, "loss": 0.7058, "step": 3161, "task_loss": 1.019930362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.070119857788086, "epoch": 2.67, "learning_rate": 3.090810288612487e-05, "loss": 0.7159, "step": 3162, "task_loss": 1.010020136833191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8089014887809753, "epoch": 2.67, "learning_rate": 3.0902064967999035e-05, "loss": 0.554, "step": 3163, "task_loss": 1.0365593433380127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9561063647270203, "epoch": 2.67, "learning_rate": 3.08960270498732e-05, "loss": 0.7181, "step": 3164, "task_loss": 1.5164334774017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6077285408973694, "epoch": 2.67, "learning_rate": 3.0889989131747376e-05, "loss": 0.5554, "step": 3165, "task_loss": 1.8480230569839478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0509960651397705, "epoch": 2.68, "learning_rate": 3.0883951213621544e-05, "loss": 0.7112, "step": 3166, "task_loss": 1.5490612983703613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0542272329330444, "epoch": 2.68, "learning_rate": 3.087791329549571e-05, "loss": 0.7934, "step": 3167, "task_loss": 1.0934957265853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5000607967376709, "epoch": 2.68, "learning_rate": 3.0871875377369885e-05, "loss": 0.7991, "step": 3168, "task_loss": 0.08335362374782562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7116163969039917, "epoch": 2.68, "learning_rate": 3.086583745924406e-05, "loss": 0.8771, "step": 3169, "task_loss": 0.7051525712013245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.104564905166626, "epoch": 2.68, "learning_rate": 3.0859799541118226e-05, "loss": 0.8047, "step": 3170, "task_loss": 1.144094705581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7311936616897583, "epoch": 2.68, "learning_rate": 3.085376162299239e-05, "loss": 0.7885, "step": 3171, "task_loss": 0.7517529129981995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1027672290802002, "epoch": 2.68, "learning_rate": 3.084772370486657e-05, "loss": 0.9567, "step": 3172, "task_loss": 1.1185749769210815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4500886797904968, "epoch": 2.68, "learning_rate": 3.0841685786740734e-05, "loss": 0.8539, "step": 3173, "task_loss": 0.30752894282341003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.459417462348938, "epoch": 2.68, "learning_rate": 3.08356478686149e-05, "loss": 0.7742, "step": 3174, "task_loss": 1.277453064918518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6754051446914673, "epoch": 2.68, "learning_rate": 3.0829609950489075e-05, "loss": 0.6208, "step": 3175, "task_loss": 1.224611520767212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34234631061553955, "epoch": 2.68, "learning_rate": 3.082357203236324e-05, "loss": 0.6069, "step": 3176, "task_loss": 0.8445130586624146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31016695499420166, "epoch": 2.69, "learning_rate": 3.081753411423741e-05, "loss": 0.7175, "step": 3177, "task_loss": 0.760482668876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8396310806274414, "epoch": 2.69, "learning_rate": 3.0811496196111584e-05, "loss": 0.8082, "step": 3178, "task_loss": 1.9849838018417358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7979298830032349, "epoch": 2.69, "learning_rate": 3.080545827798575e-05, "loss": 0.7563, "step": 3179, "task_loss": 1.4667794704437256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6752405166625977, "epoch": 2.69, "learning_rate": 3.079942035985992e-05, "loss": 0.6403, "step": 3180, "task_loss": 0.5522093176841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6803416013717651, "epoch": 2.69, "learning_rate": 3.079338244173409e-05, "loss": 0.8373, "step": 3181, "task_loss": 0.7312726974487305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5461660027503967, "epoch": 2.69, "learning_rate": 3.0787344523608266e-05, "loss": 0.6832, "step": 3182, "task_loss": 0.25080814957618713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5495463609695435, "epoch": 2.69, "learning_rate": 3.0781306605482426e-05, "loss": 0.5325, "step": 3183, "task_loss": 0.34389790892601013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7502367496490479, "epoch": 2.69, "learning_rate": 3.07752686873566e-05, "loss": 0.7234, "step": 3184, "task_loss": 0.412925660610199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9324972033500671, "epoch": 2.69, "learning_rate": 3.0769230769230774e-05, "loss": 0.7835, "step": 3185, "task_loss": 0.6428356170654297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5306106805801392, "epoch": 2.69, "learning_rate": 3.076319285110494e-05, "loss": 0.5332, "step": 3186, "task_loss": 0.39461106061935425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8705208897590637, "epoch": 2.69, "learning_rate": 3.075715493297911e-05, "loss": 0.8455, "step": 3187, "task_loss": 0.6035874485969543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9159365296363831, "epoch": 2.69, "learning_rate": 3.075111701485328e-05, "loss": 0.8895, "step": 3188, "task_loss": 1.9215211868286133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6719887256622314, "epoch": 2.7, "learning_rate": 3.074507909672745e-05, "loss": 0.6852, "step": 3189, "task_loss": 0.3079150915145874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9369650483131409, "epoch": 2.7, "learning_rate": 3.073904117860162e-05, "loss": 0.7867, "step": 3190, "task_loss": 0.6321751475334167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3319016695022583, "epoch": 2.7, "learning_rate": 3.073300326047579e-05, "loss": 0.5293, "step": 3191, "task_loss": 0.38906458020210266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4013788402080536, "epoch": 2.7, "learning_rate": 3.072696534234996e-05, "loss": 0.7443, "step": 3192, "task_loss": 0.6293163299560547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6085204482078552, "epoch": 2.7, "learning_rate": 3.0720927424224125e-05, "loss": 0.8517, "step": 3193, "task_loss": 1.3240139484405518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5559515953063965, "epoch": 2.7, "learning_rate": 3.07148895060983e-05, "loss": 0.6212, "step": 3194, "task_loss": 0.6982637047767639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8499322533607483, "epoch": 2.7, "learning_rate": 3.070885158797247e-05, "loss": 0.6071, "step": 3195, "task_loss": 0.9296495318412781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6830109357833862, "epoch": 2.7, "learning_rate": 3.0702813669846634e-05, "loss": 0.6985, "step": 3196, "task_loss": 0.625149130821228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6141483187675476, "epoch": 2.7, "learning_rate": 3.069677575172081e-05, "loss": 0.6839, "step": 3197, "task_loss": 0.5352165102958679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6265395283699036, "epoch": 2.7, "learning_rate": 3.069073783359498e-05, "loss": 0.6138, "step": 3198, "task_loss": 0.4455566108226776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.591829776763916, "epoch": 2.7, "learning_rate": 3.068469991546914e-05, "loss": 0.7552, "step": 3199, "task_loss": 0.6086850762367249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8524900078773499, "epoch": 2.7, "learning_rate": 3.0678661997343316e-05, "loss": 0.7744, "step": 3200, "task_loss": 1.5730679035186768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6828543543815613, "epoch": 2.71, "learning_rate": 3.067262407921749e-05, "loss": 0.6728, "step": 3201, "task_loss": 1.2370249032974243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8591368198394775, "epoch": 2.71, "learning_rate": 3.066658616109166e-05, "loss": 0.7115, "step": 3202, "task_loss": 1.4544029235839844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5472726821899414, "epoch": 2.71, "learning_rate": 3.0660548242965824e-05, "loss": 0.6814, "step": 3203, "task_loss": 0.6061931848526001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.664957582950592, "epoch": 2.71, "learning_rate": 3.065451032484e-05, "loss": 0.5888, "step": 3204, "task_loss": 0.6862332820892334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7381258010864258, "epoch": 2.71, "learning_rate": 3.0648472406714165e-05, "loss": 0.6844, "step": 3205, "task_loss": 0.5976237654685974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8357290029525757, "epoch": 2.71, "learning_rate": 3.064243448858833e-05, "loss": 0.7789, "step": 3206, "task_loss": 1.7306406497955322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1574708223342896, "epoch": 2.71, "learning_rate": 3.0636396570462507e-05, "loss": 0.8458, "step": 3207, "task_loss": 0.46113723516464233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42682120203971863, "epoch": 2.71, "learning_rate": 3.0630358652336674e-05, "loss": 0.7908, "step": 3208, "task_loss": 0.44245457649230957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8901224136352539, "epoch": 2.71, "learning_rate": 3.062432073421084e-05, "loss": 0.9076, "step": 3209, "task_loss": 1.9688218832015991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.490556538105011, "epoch": 2.71, "learning_rate": 3.0618282816085015e-05, "loss": 0.5647, "step": 3210, "task_loss": 0.09024570137262344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8634461164474487, "epoch": 2.71, "learning_rate": 3.061224489795919e-05, "loss": 0.7147, "step": 3211, "task_loss": 0.4070153534412384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9406939744949341, "epoch": 2.71, "learning_rate": 3.0606206979833356e-05, "loss": 0.8057, "step": 3212, "task_loss": 0.8827047348022461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6856909394264221, "epoch": 2.72, "learning_rate": 3.060016906170752e-05, "loss": 0.5144, "step": 3213, "task_loss": 0.2870550751686096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5347412824630737, "epoch": 2.72, "learning_rate": 3.05941311435817e-05, "loss": 0.7348, "step": 3214, "task_loss": 0.6001242399215698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8324074745178223, "epoch": 2.72, "learning_rate": 3.0588093225455864e-05, "loss": 0.7328, "step": 3215, "task_loss": 0.47560784220695496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7889832258224487, "epoch": 2.72, "learning_rate": 3.058205530733003e-05, "loss": 0.8431, "step": 3216, "task_loss": 0.6564210057258606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0889413356781006, "epoch": 2.72, "learning_rate": 3.0576017389204206e-05, "loss": 0.7747, "step": 3217, "task_loss": 0.5277644991874695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5270012617111206, "epoch": 2.72, "learning_rate": 3.056997947107837e-05, "loss": 0.8013, "step": 3218, "task_loss": 0.5058249235153198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6184274554252625, "epoch": 2.72, "learning_rate": 3.056394155295254e-05, "loss": 0.7006, "step": 3219, "task_loss": 0.2727234959602356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6865006685256958, "epoch": 2.72, "learning_rate": 3.0557903634826714e-05, "loss": 0.6968, "step": 3220, "task_loss": 0.3609784245491028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5488929748535156, "epoch": 2.72, "learning_rate": 3.055186571670088e-05, "loss": 0.6205, "step": 3221, "task_loss": 0.632896900177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5753695964813232, "epoch": 2.72, "learning_rate": 3.0545827798575055e-05, "loss": 0.5562, "step": 3222, "task_loss": 1.0328339338302612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9624415040016174, "epoch": 2.72, "learning_rate": 3.053978988044922e-05, "loss": 0.8698, "step": 3223, "task_loss": 1.5791884660720825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.08939790725708, "epoch": 2.72, "learning_rate": 3.053375196232339e-05, "loss": 1.0014, "step": 3224, "task_loss": 1.1648470163345337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0841717720031738, "epoch": 2.73, "learning_rate": 3.052771404419756e-05, "loss": 0.8551, "step": 3225, "task_loss": 0.5503373146057129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9547292590141296, "epoch": 2.73, "learning_rate": 3.052167612607173e-05, "loss": 0.9845, "step": 3226, "task_loss": 1.687423825263977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.743256688117981, "epoch": 2.73, "learning_rate": 3.05156382079459e-05, "loss": 0.849, "step": 3227, "task_loss": 0.6106722354888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6666925549507141, "epoch": 2.73, "learning_rate": 3.050960028982007e-05, "loss": 0.8242, "step": 3228, "task_loss": 0.9376764297485352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6793197393417358, "epoch": 2.73, "learning_rate": 3.050356237169424e-05, "loss": 0.877, "step": 3229, "task_loss": 0.5538238286972046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.923708438873291, "epoch": 2.73, "learning_rate": 3.049752445356841e-05, "loss": 1.0121, "step": 3230, "task_loss": 1.5856703519821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8031929135322571, "epoch": 2.73, "learning_rate": 3.049148653544258e-05, "loss": 0.7474, "step": 3231, "task_loss": 1.0973801612854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6770730018615723, "epoch": 2.73, "learning_rate": 3.0485448617316754e-05, "loss": 0.646, "step": 3232, "task_loss": 0.7020218372344971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3898873031139374, "epoch": 2.73, "learning_rate": 3.0479410699190918e-05, "loss": 0.507, "step": 3233, "task_loss": 0.388008713722229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.958076000213623, "epoch": 2.73, "learning_rate": 3.047337278106509e-05, "loss": 0.7033, "step": 3234, "task_loss": 0.3326040804386139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5524904727935791, "epoch": 2.73, "learning_rate": 3.0467334862939262e-05, "loss": 0.6663, "step": 3235, "task_loss": 0.5325053334236145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5622840523719788, "epoch": 2.73, "learning_rate": 3.046129694481343e-05, "loss": 0.7751, "step": 3236, "task_loss": 0.34747156500816345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6312466859817505, "epoch": 2.74, "learning_rate": 3.04552590266876e-05, "loss": 0.6095, "step": 3237, "task_loss": 0.14888127148151398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6224997639656067, "epoch": 2.74, "learning_rate": 3.044922110856177e-05, "loss": 0.7414, "step": 3238, "task_loss": 0.4874679744243622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29986628890037537, "epoch": 2.74, "learning_rate": 3.0443183190435938e-05, "loss": 0.6452, "step": 3239, "task_loss": 0.31125733256340027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4303237795829773, "epoch": 2.74, "learning_rate": 3.043714527231011e-05, "loss": 0.624, "step": 3240, "task_loss": 1.8085837364196777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9526863098144531, "epoch": 2.74, "learning_rate": 3.043110735418428e-05, "loss": 0.7177, "step": 3241, "task_loss": 0.744300901889801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8251503705978394, "epoch": 2.74, "learning_rate": 3.042506943605845e-05, "loss": 0.667, "step": 3242, "task_loss": 0.38434773683547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4565238356590271, "epoch": 2.74, "learning_rate": 3.0419031517932617e-05, "loss": 0.6573, "step": 3243, "task_loss": 0.513244092464447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.086967945098877, "epoch": 2.74, "learning_rate": 3.0412993599806787e-05, "loss": 1.0609, "step": 3244, "task_loss": 0.7730523943901062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4086042642593384, "epoch": 2.74, "learning_rate": 3.040695568168096e-05, "loss": 0.8459, "step": 3245, "task_loss": 1.009049415588379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.665966808795929, "epoch": 2.74, "learning_rate": 3.0400917763555125e-05, "loss": 0.7695, "step": 3246, "task_loss": 1.1233670711517334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.559831976890564, "epoch": 2.74, "learning_rate": 3.0394879845429296e-05, "loss": 0.6049, "step": 3247, "task_loss": 0.3660418391227722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6235067844390869, "epoch": 2.75, "learning_rate": 3.038884192730347e-05, "loss": 0.8961, "step": 3248, "task_loss": 0.5754123330116272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5849120020866394, "epoch": 2.75, "learning_rate": 3.0382804009177633e-05, "loss": 0.707, "step": 3249, "task_loss": 0.5135920643806458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6178340315818787, "epoch": 2.75, "learning_rate": 3.0376766091051807e-05, "loss": 0.6944, "step": 3250, "task_loss": 0.595054566860199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8201622366905212, "epoch": 2.75, "learning_rate": 3.0370728172925978e-05, "loss": 0.7424, "step": 3251, "task_loss": 0.5152836441993713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9982415437698364, "epoch": 2.75, "learning_rate": 3.036469025480015e-05, "loss": 0.7612, "step": 3252, "task_loss": 1.0326942205429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2640788555145264, "epoch": 2.75, "learning_rate": 3.0358652336674316e-05, "loss": 0.9939, "step": 3253, "task_loss": 1.0172697305679321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0437321662902832, "epoch": 2.75, "learning_rate": 3.0352614418548486e-05, "loss": 0.9208, "step": 3254, "task_loss": 1.6995952129364014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4299675226211548, "epoch": 2.75, "learning_rate": 3.0346576500422657e-05, "loss": 0.9805, "step": 3255, "task_loss": 0.7143516540527344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5238592624664307, "epoch": 2.75, "learning_rate": 3.0340538582296824e-05, "loss": 0.591, "step": 3256, "task_loss": 1.1079025268554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6339019536972046, "epoch": 2.75, "learning_rate": 3.0334500664170994e-05, "loss": 0.6887, "step": 3257, "task_loss": 0.8352885842323303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7218683958053589, "epoch": 2.75, "learning_rate": 3.0328462746045165e-05, "loss": 0.8389, "step": 3258, "task_loss": 0.8977619409561157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8854718208312988, "epoch": 2.75, "learning_rate": 3.0322424827919332e-05, "loss": 0.8207, "step": 3259, "task_loss": 1.7896380424499512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0721087455749512, "epoch": 2.76, "learning_rate": 3.0316386909793503e-05, "loss": 0.8341, "step": 3260, "task_loss": 0.8420252799987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1111098527908325, "epoch": 2.76, "learning_rate": 3.0310348991667677e-05, "loss": 0.8382, "step": 3261, "task_loss": 0.5889455676078796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8799079656600952, "epoch": 2.76, "learning_rate": 3.0304311073541847e-05, "loss": 0.6934, "step": 3262, "task_loss": 1.2015488147735596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3919408917427063, "epoch": 2.76, "learning_rate": 3.029827315541601e-05, "loss": 0.6058, "step": 3263, "task_loss": 0.7438454627990723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9071213006973267, "epoch": 2.76, "learning_rate": 3.0292235237290185e-05, "loss": 0.7473, "step": 3264, "task_loss": 0.7080237865447998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3722788095474243, "epoch": 2.76, "learning_rate": 3.0286197319164356e-05, "loss": 0.7755, "step": 3265, "task_loss": 1.4974243640899658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.583774983882904, "epoch": 2.76, "learning_rate": 3.0280159401038523e-05, "loss": 0.7605, "step": 3266, "task_loss": 0.15253575146198273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6703147888183594, "epoch": 2.76, "learning_rate": 3.0274121482912693e-05, "loss": 0.756, "step": 3267, "task_loss": 0.7222764492034912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4770735502243042, "epoch": 2.76, "learning_rate": 3.0268083564786864e-05, "loss": 0.7505, "step": 3268, "task_loss": 0.9355275630950928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2244980335235596, "epoch": 2.76, "learning_rate": 3.026204564666103e-05, "loss": 0.9068, "step": 3269, "task_loss": 1.3997679948806763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7069613933563232, "epoch": 2.76, "learning_rate": 3.0256007728535202e-05, "loss": 0.7463, "step": 3270, "task_loss": 0.6206645369529724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5903657078742981, "epoch": 2.76, "learning_rate": 3.0249969810409372e-05, "loss": 0.9634, "step": 3271, "task_loss": 0.3822365403175354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6834724545478821, "epoch": 2.77, "learning_rate": 3.0243931892283546e-05, "loss": 0.825, "step": 3272, "task_loss": 0.8759979009628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5028154253959656, "epoch": 2.77, "learning_rate": 3.023789397415771e-05, "loss": 0.5826, "step": 3273, "task_loss": 1.3599047660827637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8747584223747253, "epoch": 2.77, "learning_rate": 3.023185605603188e-05, "loss": 0.5541, "step": 3274, "task_loss": 0.46913453936576843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6725547909736633, "epoch": 2.77, "learning_rate": 3.0225818137906055e-05, "loss": 0.5366, "step": 3275, "task_loss": 0.2768903374671936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8722879886627197, "epoch": 2.77, "learning_rate": 3.021978021978022e-05, "loss": 0.654, "step": 3276, "task_loss": 0.9155100584030151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8894006013870239, "epoch": 2.77, "learning_rate": 3.0213742301654392e-05, "loss": 0.8126, "step": 3277, "task_loss": 0.2742604613304138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5366387367248535, "epoch": 2.77, "learning_rate": 3.0207704383528563e-05, "loss": 0.6444, "step": 3278, "task_loss": 0.7328145503997803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.846782922744751, "epoch": 2.77, "learning_rate": 3.0201666465402727e-05, "loss": 0.6579, "step": 3279, "task_loss": 1.3426260948181152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8142915964126587, "epoch": 2.77, "learning_rate": 3.01956285472769e-05, "loss": 0.9306, "step": 3280, "task_loss": 0.413924902677536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5528120398521423, "epoch": 2.77, "learning_rate": 3.018959062915107e-05, "loss": 0.776, "step": 3281, "task_loss": 1.9705495834350586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9322274923324585, "epoch": 2.77, "learning_rate": 3.0183552711025242e-05, "loss": 0.9211, "step": 3282, "task_loss": 2.0287837982177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3485484719276428, "epoch": 2.77, "learning_rate": 3.017751479289941e-05, "loss": 0.7222, "step": 3283, "task_loss": 0.14161722362041473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.81611168384552, "epoch": 2.78, "learning_rate": 3.017147687477358e-05, "loss": 0.7918, "step": 3284, "task_loss": 1.099387288093567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5183975696563721, "epoch": 2.78, "learning_rate": 3.016543895664775e-05, "loss": 0.9085, "step": 3285, "task_loss": 0.8479616045951843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.965560793876648, "epoch": 2.78, "learning_rate": 3.0159401038521917e-05, "loss": 1.0197, "step": 3286, "task_loss": 0.7708216905593872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7323737144470215, "epoch": 2.78, "learning_rate": 3.0153363120396088e-05, "loss": 0.7039, "step": 3287, "task_loss": 0.989920973777771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4878042936325073, "epoch": 2.78, "learning_rate": 3.014732520227026e-05, "loss": 0.9399, "step": 3288, "task_loss": 1.4251518249511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42194879055023193, "epoch": 2.78, "learning_rate": 3.0141287284144426e-05, "loss": 0.6529, "step": 3289, "task_loss": 0.21817432343959808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7053632736206055, "epoch": 2.78, "learning_rate": 3.0135249366018596e-05, "loss": 0.8551, "step": 3290, "task_loss": 0.44933223724365234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5540753602981567, "epoch": 2.78, "learning_rate": 3.012921144789277e-05, "loss": 0.8164, "step": 3291, "task_loss": 0.3803727328777313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8512284159660339, "epoch": 2.78, "learning_rate": 3.012317352976694e-05, "loss": 0.6673, "step": 3292, "task_loss": 0.6789261102676392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7099487781524658, "epoch": 2.78, "learning_rate": 3.0117135611641108e-05, "loss": 0.8345, "step": 3293, "task_loss": 0.5557613372802734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5668407082557678, "epoch": 2.78, "learning_rate": 3.011109769351528e-05, "loss": 0.6318, "step": 3294, "task_loss": 0.5616931915283203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5764466524124146, "epoch": 2.78, "learning_rate": 3.010505977538945e-05, "loss": 0.8859, "step": 3295, "task_loss": 0.9217730760574341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9525061845779419, "epoch": 2.79, "learning_rate": 3.0099021857263616e-05, "loss": 0.7496, "step": 3296, "task_loss": 1.0445730686187744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3469659090042114, "epoch": 2.79, "learning_rate": 3.0092983939137787e-05, "loss": 0.7823, "step": 3297, "task_loss": 1.539186954498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7258437871932983, "epoch": 2.79, "learning_rate": 3.0086946021011957e-05, "loss": 0.721, "step": 3298, "task_loss": 0.718285322189331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4952031672000885, "epoch": 2.79, "learning_rate": 3.0080908102886125e-05, "loss": 0.7038, "step": 3299, "task_loss": 0.27497726678848267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6820345520973206, "epoch": 2.79, "learning_rate": 3.0074870184760295e-05, "loss": 0.8643, "step": 3300, "task_loss": 1.3861004114151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6025184392929077, "epoch": 2.79, "learning_rate": 3.0068832266634466e-05, "loss": 0.6824, "step": 3301, "task_loss": 0.6789702773094177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9073735475540161, "epoch": 2.79, "learning_rate": 3.006279434850864e-05, "loss": 0.8305, "step": 3302, "task_loss": 0.5434929132461548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7894983291625977, "epoch": 2.79, "learning_rate": 3.0056756430382803e-05, "loss": 1.1227, "step": 3303, "task_loss": 0.8957256078720093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8467644453048706, "epoch": 2.79, "learning_rate": 3.0050718512256974e-05, "loss": 0.8402, "step": 3304, "task_loss": 0.8982765078544617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5099388957023621, "epoch": 2.79, "learning_rate": 3.0044680594131148e-05, "loss": 0.6793, "step": 3305, "task_loss": 0.8377856016159058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5668447613716125, "epoch": 2.79, "learning_rate": 3.0038642676005312e-05, "loss": 0.6099, "step": 3306, "task_loss": 0.7080490589141846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7160319685935974, "epoch": 2.79, "learning_rate": 3.0032604757879486e-05, "loss": 0.6283, "step": 3307, "task_loss": 0.8550854921340942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.603061854839325, "epoch": 2.8, "learning_rate": 3.0026566839753656e-05, "loss": 0.8496, "step": 3308, "task_loss": 1.1990206241607666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8270069360733032, "epoch": 2.8, "learning_rate": 3.002052892162782e-05, "loss": 0.9871, "step": 3309, "task_loss": 1.043546438217163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.093401312828064, "epoch": 2.8, "learning_rate": 3.0014491003501994e-05, "loss": 0.826, "step": 3310, "task_loss": 1.8238162994384766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8919836282730103, "epoch": 2.8, "learning_rate": 3.0008453085376165e-05, "loss": 0.7791, "step": 3311, "task_loss": 1.5206384658813477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8345562219619751, "epoch": 2.8, "learning_rate": 3.0002415167250335e-05, "loss": 0.8903, "step": 3312, "task_loss": 0.46383753418922424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43804049491882324, "epoch": 2.8, "learning_rate": 2.9996377249124502e-05, "loss": 0.5437, "step": 3313, "task_loss": 0.21464987099170685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1369664669036865, "epoch": 2.8, "learning_rate": 2.9990339330998673e-05, "loss": 0.8468, "step": 3314, "task_loss": 1.3994513750076294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4454078674316406, "epoch": 2.8, "learning_rate": 2.9984301412872844e-05, "loss": 0.5578, "step": 3315, "task_loss": 0.10286688804626465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.838683009147644, "epoch": 2.8, "learning_rate": 2.997826349474701e-05, "loss": 0.7295, "step": 3316, "task_loss": 0.7089868783950806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.707889974117279, "epoch": 2.8, "learning_rate": 2.997222557662118e-05, "loss": 0.7253, "step": 3317, "task_loss": 0.9120591282844543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43396711349487305, "epoch": 2.8, "learning_rate": 2.9966187658495355e-05, "loss": 0.8796, "step": 3318, "task_loss": 0.4949934482574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4237309694290161, "epoch": 2.81, "learning_rate": 2.996014974036952e-05, "loss": 0.6691, "step": 3319, "task_loss": 0.7241352200508118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9272547960281372, "epoch": 2.81, "learning_rate": 2.995411182224369e-05, "loss": 0.7312, "step": 3320, "task_loss": 0.7715173959732056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5984746217727661, "epoch": 2.81, "learning_rate": 2.9948073904117864e-05, "loss": 0.6947, "step": 3321, "task_loss": 0.8189025521278381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3606223165988922, "epoch": 2.81, "learning_rate": 2.9942035985992034e-05, "loss": 0.7051, "step": 3322, "task_loss": 0.08540637791156769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6124386787414551, "epoch": 2.81, "learning_rate": 2.99359980678662e-05, "loss": 0.5368, "step": 3323, "task_loss": 0.17735710740089417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6173439025878906, "epoch": 2.81, "learning_rate": 2.9929960149740372e-05, "loss": 0.7134, "step": 3324, "task_loss": 0.7662703394889832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.646454930305481, "epoch": 2.81, "learning_rate": 2.9923922231614543e-05, "loss": 0.6548, "step": 3325, "task_loss": 0.3659009337425232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7730146646499634, "epoch": 2.81, "learning_rate": 2.991788431348871e-05, "loss": 0.7001, "step": 3326, "task_loss": 0.5309557318687439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17139969766139984, "epoch": 2.81, "learning_rate": 2.991184639536288e-05, "loss": 0.5165, "step": 3327, "task_loss": 0.4610321521759033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.901475191116333, "epoch": 2.81, "learning_rate": 2.990580847723705e-05, "loss": 0.6257, "step": 3328, "task_loss": 0.9280927777290344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3980725407600403, "epoch": 2.81, "learning_rate": 2.9899770559111218e-05, "loss": 0.5709, "step": 3329, "task_loss": 1.1071299314498901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.418002724647522, "epoch": 2.81, "learning_rate": 2.989373264098539e-05, "loss": 0.8482, "step": 3330, "task_loss": 0.20210659503936768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6358230113983154, "epoch": 2.82, "learning_rate": 2.988769472285956e-05, "loss": 0.9167, "step": 3331, "task_loss": 0.745890200138092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9441062211990356, "epoch": 2.82, "learning_rate": 2.9881656804733733e-05, "loss": 0.8165, "step": 3332, "task_loss": 0.959979772567749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9455717206001282, "epoch": 2.82, "learning_rate": 2.9875618886607897e-05, "loss": 0.8012, "step": 3333, "task_loss": 1.034719467163086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8987544775009155, "epoch": 2.82, "learning_rate": 2.986958096848207e-05, "loss": 0.6793, "step": 3334, "task_loss": 0.8512789011001587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5758401155471802, "epoch": 2.82, "learning_rate": 2.986354305035624e-05, "loss": 0.6613, "step": 3335, "task_loss": 0.9593351483345032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28677982091903687, "epoch": 2.82, "learning_rate": 2.9857505132230405e-05, "loss": 0.5497, "step": 3336, "task_loss": 0.06280370056629181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6193314790725708, "epoch": 2.82, "learning_rate": 2.985146721410458e-05, "loss": 0.8424, "step": 3337, "task_loss": 0.4445653557777405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6852444410324097, "epoch": 2.82, "learning_rate": 2.984542929597875e-05, "loss": 1.0175, "step": 3338, "task_loss": 0.5066931843757629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5662254691123962, "epoch": 2.82, "learning_rate": 2.9839391377852917e-05, "loss": 0.721, "step": 3339, "task_loss": 0.40833961963653564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5589171648025513, "epoch": 2.82, "learning_rate": 2.9833353459727088e-05, "loss": 0.5924, "step": 3340, "task_loss": 0.5408655405044556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5563128590583801, "epoch": 2.82, "learning_rate": 2.9827315541601258e-05, "loss": 0.7494, "step": 3341, "task_loss": 0.5156218409538269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4187811613082886, "epoch": 2.82, "learning_rate": 2.982127762347543e-05, "loss": 0.8726, "step": 3342, "task_loss": 1.3007992506027222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.6975007057189941, "epoch": 2.83, "learning_rate": 2.9815239705349596e-05, "loss": 1.1042, "step": 3343, "task_loss": 1.2388087511062622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32393598556518555, "epoch": 2.83, "learning_rate": 2.9809201787223766e-05, "loss": 0.447, "step": 3344, "task_loss": 0.4461499750614166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5517587661743164, "epoch": 2.83, "learning_rate": 2.9803163869097937e-05, "loss": 0.709, "step": 3345, "task_loss": 0.42245104908943176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8855922222137451, "epoch": 2.83, "learning_rate": 2.9797125950972104e-05, "loss": 0.8355, "step": 3346, "task_loss": 0.5716240406036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47937145829200745, "epoch": 2.83, "learning_rate": 2.9791088032846275e-05, "loss": 0.6628, "step": 3347, "task_loss": 0.4947882294654846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.928808331489563, "epoch": 2.83, "learning_rate": 2.978505011472045e-05, "loss": 0.6398, "step": 3348, "task_loss": 1.5807663202285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47245272994041443, "epoch": 2.83, "learning_rate": 2.9779012196594612e-05, "loss": 0.791, "step": 3349, "task_loss": 1.283268928527832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6965512037277222, "epoch": 2.83, "learning_rate": 2.9772974278468786e-05, "loss": 0.5812, "step": 3350, "task_loss": 1.1770071983337402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6762098073959351, "epoch": 2.83, "learning_rate": 2.9766936360342957e-05, "loss": 0.8083, "step": 3351, "task_loss": 0.9953381419181824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7784196138381958, "epoch": 2.83, "learning_rate": 2.976089844221712e-05, "loss": 0.8165, "step": 3352, "task_loss": 1.2067924737930298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9041169881820679, "epoch": 2.83, "learning_rate": 2.9754860524091295e-05, "loss": 0.7064, "step": 3353, "task_loss": 0.554377555847168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.675803542137146, "epoch": 2.83, "learning_rate": 2.9748822605965465e-05, "loss": 0.7949, "step": 3354, "task_loss": 1.5139515399932861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2474185228347778, "epoch": 2.84, "learning_rate": 2.9742784687839636e-05, "loss": 1.051, "step": 3355, "task_loss": 1.3728817701339722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7146264910697937, "epoch": 2.84, "learning_rate": 2.9736746769713803e-05, "loss": 0.7898, "step": 3356, "task_loss": 0.6647437214851379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7725421786308289, "epoch": 2.84, "learning_rate": 2.9730708851587974e-05, "loss": 0.7098, "step": 3357, "task_loss": 0.6905233860015869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6829078197479248, "epoch": 2.84, "learning_rate": 2.9724670933462144e-05, "loss": 0.5362, "step": 3358, "task_loss": 0.6515323519706726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5686049461364746, "epoch": 2.84, "learning_rate": 2.971863301533631e-05, "loss": 0.8427, "step": 3359, "task_loss": 1.1448637247085571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7078079581260681, "epoch": 2.84, "learning_rate": 2.9712595097210482e-05, "loss": 0.7538, "step": 3360, "task_loss": 0.8823916912078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5136501789093018, "epoch": 2.84, "learning_rate": 2.9706557179084653e-05, "loss": 0.7323, "step": 3361, "task_loss": 0.2550985813140869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5257319211959839, "epoch": 2.84, "learning_rate": 2.970051926095882e-05, "loss": 0.6241, "step": 3362, "task_loss": 0.5669220685958862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8820339441299438, "epoch": 2.84, "learning_rate": 2.969448134283299e-05, "loss": 0.862, "step": 3363, "task_loss": 0.6643784046173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7887123823165894, "epoch": 2.84, "learning_rate": 2.9688443424707164e-05, "loss": 0.8536, "step": 3364, "task_loss": 1.103582501411438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4248363673686981, "epoch": 2.84, "learning_rate": 2.9682405506581335e-05, "loss": 0.6901, "step": 3365, "task_loss": 1.1229437589645386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8013144731521606, "epoch": 2.84, "learning_rate": 2.9676367588455502e-05, "loss": 0.5921, "step": 3366, "task_loss": 1.3697738647460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5745146870613098, "epoch": 2.85, "learning_rate": 2.9670329670329673e-05, "loss": 0.8415, "step": 3367, "task_loss": 0.9808184504508972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5577234029769897, "epoch": 2.85, "learning_rate": 2.9664291752203843e-05, "loss": 0.7684, "step": 3368, "task_loss": 0.9671440124511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4190555214881897, "epoch": 2.85, "learning_rate": 2.965825383407801e-05, "loss": 0.699, "step": 3369, "task_loss": 0.09167198836803436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8989880084991455, "epoch": 2.85, "learning_rate": 2.965221591595218e-05, "loss": 0.8081, "step": 3370, "task_loss": 1.1267329454421997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7191668152809143, "epoch": 2.85, "learning_rate": 2.964617799782635e-05, "loss": 0.712, "step": 3371, "task_loss": 0.8082057237625122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0991194248199463, "epoch": 2.85, "learning_rate": 2.964014007970052e-05, "loss": 0.9959, "step": 3372, "task_loss": 0.6798328757286072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5841532349586487, "epoch": 2.85, "learning_rate": 2.963410216157469e-05, "loss": 0.4368, "step": 3373, "task_loss": 0.3852476477622986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4525078237056732, "epoch": 2.85, "learning_rate": 2.962806424344886e-05, "loss": 0.738, "step": 3374, "task_loss": 0.6469378471374512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0774712562561035, "epoch": 2.85, "learning_rate": 2.9622026325323034e-05, "loss": 0.695, "step": 3375, "task_loss": 0.8054196834564209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8607787489891052, "epoch": 2.85, "learning_rate": 2.9615988407197198e-05, "loss": 0.7308, "step": 3376, "task_loss": 1.0702584981918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9457405805587769, "epoch": 2.85, "learning_rate": 2.9609950489071368e-05, "loss": 0.7783, "step": 3377, "task_loss": 0.7950518131256104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5823343396186829, "epoch": 2.85, "learning_rate": 2.9603912570945542e-05, "loss": 0.5022, "step": 3378, "task_loss": 0.6022517085075378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6101597547531128, "epoch": 2.86, "learning_rate": 2.9597874652819706e-05, "loss": 0.9097, "step": 3379, "task_loss": 1.0370609760284424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.346673846244812, "epoch": 2.86, "learning_rate": 2.959183673469388e-05, "loss": 0.7723, "step": 3380, "task_loss": 0.6194068789482117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.477618545293808, "epoch": 2.86, "learning_rate": 2.958579881656805e-05, "loss": 0.6663, "step": 3381, "task_loss": 0.24159900844097137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9971652626991272, "epoch": 2.86, "learning_rate": 2.9579760898442214e-05, "loss": 0.8827, "step": 3382, "task_loss": 1.0553619861602783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3792429268360138, "epoch": 2.86, "learning_rate": 2.9573722980316388e-05, "loss": 0.7844, "step": 3383, "task_loss": 0.3806905150413513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.310664415359497, "epoch": 2.86, "learning_rate": 2.956768506219056e-05, "loss": 0.993, "step": 3384, "task_loss": 0.9180289506912231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3871214985847473, "epoch": 2.86, "learning_rate": 2.956164714406473e-05, "loss": 0.8203, "step": 3385, "task_loss": 0.7736110687255859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8282737135887146, "epoch": 2.86, "learning_rate": 2.9555609225938897e-05, "loss": 0.9284, "step": 3386, "task_loss": 0.7874165177345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1021589040756226, "epoch": 2.86, "learning_rate": 2.9549571307813067e-05, "loss": 0.7402, "step": 3387, "task_loss": 1.7246118783950806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6346914172172546, "epoch": 2.86, "learning_rate": 2.9543533389687238e-05, "loss": 1.0035, "step": 3388, "task_loss": 0.7719483971595764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8377894759178162, "epoch": 2.86, "learning_rate": 2.9537495471561405e-05, "loss": 0.5944, "step": 3389, "task_loss": 0.6319814324378967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4818529784679413, "epoch": 2.87, "learning_rate": 2.9531457553435575e-05, "loss": 0.6585, "step": 3390, "task_loss": 0.8902625441551208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5993454456329346, "epoch": 2.87, "learning_rate": 2.952541963530975e-05, "loss": 0.5803, "step": 3391, "task_loss": 0.7087879180908203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7439875602722168, "epoch": 2.87, "learning_rate": 2.9519381717183913e-05, "loss": 0.8167, "step": 3392, "task_loss": 0.8190661668777466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8822708129882812, "epoch": 2.87, "learning_rate": 2.9513343799058084e-05, "loss": 0.8505, "step": 3393, "task_loss": 0.9617863893508911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8747755885124207, "epoch": 2.87, "learning_rate": 2.9507305880932258e-05, "loss": 0.6069, "step": 3394, "task_loss": 0.4685360789299011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4997193217277527, "epoch": 2.87, "learning_rate": 2.9501267962806428e-05, "loss": 0.788, "step": 3395, "task_loss": 0.5313251614570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5267899036407471, "epoch": 2.87, "learning_rate": 2.9495230044680595e-05, "loss": 0.6196, "step": 3396, "task_loss": 0.3576836884021759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6000368595123291, "epoch": 2.87, "learning_rate": 2.9489192126554766e-05, "loss": 0.6254, "step": 3397, "task_loss": 0.25771230459213257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4994317889213562, "epoch": 2.87, "learning_rate": 2.9483154208428937e-05, "loss": 0.6603, "step": 3398, "task_loss": 0.467429518699646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5789288282394409, "epoch": 2.87, "learning_rate": 2.9477116290303104e-05, "loss": 0.8261, "step": 3399, "task_loss": 0.9145685434341431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6369884610176086, "epoch": 2.87, "learning_rate": 2.9471078372177274e-05, "loss": 0.716, "step": 3400, "task_loss": 0.8176102638244629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5472508668899536, "epoch": 2.87, "learning_rate": 2.9465040454051445e-05, "loss": 0.5094, "step": 3401, "task_loss": 0.6450841426849365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8643209338188171, "epoch": 2.88, "learning_rate": 2.9459002535925612e-05, "loss": 0.7853, "step": 3402, "task_loss": 0.9234936237335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6933404803276062, "epoch": 2.88, "learning_rate": 2.9452964617799783e-05, "loss": 0.7104, "step": 3403, "task_loss": 0.9083490967750549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7489376068115234, "epoch": 2.88, "learning_rate": 2.9446926699673953e-05, "loss": 0.8399, "step": 3404, "task_loss": 1.4088902473449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6857874393463135, "epoch": 2.88, "learning_rate": 2.9440888781548127e-05, "loss": 0.8249, "step": 3405, "task_loss": 0.3899174928665161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5934022665023804, "epoch": 2.88, "learning_rate": 2.943485086342229e-05, "loss": 0.5459, "step": 3406, "task_loss": 0.6158190965652466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37101471424102783, "epoch": 2.88, "learning_rate": 2.9428812945296465e-05, "loss": 0.7211, "step": 3407, "task_loss": 0.8972424268722534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7002087831497192, "epoch": 2.88, "learning_rate": 2.9422775027170636e-05, "loss": 0.7783, "step": 3408, "task_loss": 0.6444404125213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4548088312149048, "epoch": 2.88, "learning_rate": 2.94167371090448e-05, "loss": 0.5573, "step": 3409, "task_loss": 0.43447262048721313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.437225341796875, "epoch": 2.88, "learning_rate": 2.9410699190918973e-05, "loss": 0.5836, "step": 3410, "task_loss": 0.48462316393852234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5605751276016235, "epoch": 2.88, "learning_rate": 2.9404661272793144e-05, "loss": 0.6641, "step": 3411, "task_loss": 1.3483930826187134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8081914186477661, "epoch": 2.88, "learning_rate": 2.939862335466731e-05, "loss": 0.7875, "step": 3412, "task_loss": 0.9629995822906494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6137567758560181, "epoch": 2.88, "learning_rate": 2.939258543654148e-05, "loss": 0.6513, "step": 3413, "task_loss": 0.9355418086051941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.480022132396698, "epoch": 2.89, "learning_rate": 2.9386547518415652e-05, "loss": 0.9328, "step": 3414, "task_loss": 1.194594144821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6108629703521729, "epoch": 2.89, "learning_rate": 2.9380509600289823e-05, "loss": 0.8745, "step": 3415, "task_loss": 1.4879045486450195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45273736119270325, "epoch": 2.89, "learning_rate": 2.937447168216399e-05, "loss": 0.8364, "step": 3416, "task_loss": 0.8298737406730652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7358760237693787, "epoch": 2.89, "learning_rate": 2.936843376403816e-05, "loss": 0.8043, "step": 3417, "task_loss": 1.0173017978668213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1834475994110107, "epoch": 2.89, "learning_rate": 2.936239584591233e-05, "loss": 0.9238, "step": 3418, "task_loss": 0.9043486714363098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5684958696365356, "epoch": 2.89, "learning_rate": 2.9356357927786498e-05, "loss": 0.5193, "step": 3419, "task_loss": 0.7145360708236694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.123645544052124, "epoch": 2.89, "learning_rate": 2.935032000966067e-05, "loss": 0.8302, "step": 3420, "task_loss": 1.851700782775879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48065564036369324, "epoch": 2.89, "learning_rate": 2.9344282091534843e-05, "loss": 0.7259, "step": 3421, "task_loss": 1.1903222799301147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9967334866523743, "epoch": 2.89, "learning_rate": 2.9338244173409007e-05, "loss": 0.7928, "step": 3422, "task_loss": 1.459773302078247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6306959390640259, "epoch": 2.89, "learning_rate": 2.933220625528318e-05, "loss": 0.6586, "step": 3423, "task_loss": 0.5843677520751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.805478572845459, "epoch": 2.89, "learning_rate": 2.932616833715735e-05, "loss": 0.8505, "step": 3424, "task_loss": 0.29496854543685913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6788901090621948, "epoch": 2.89, "learning_rate": 2.932013041903152e-05, "loss": 0.6658, "step": 3425, "task_loss": 1.5300517082214355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6280191540718079, "epoch": 2.9, "learning_rate": 2.931409250090569e-05, "loss": 0.6482, "step": 3426, "task_loss": 1.5265891551971436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.674058198928833, "epoch": 2.9, "learning_rate": 2.930805458277986e-05, "loss": 0.8421, "step": 3427, "task_loss": 0.6224533915519714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7258622646331787, "epoch": 2.9, "learning_rate": 2.930201666465403e-05, "loss": 0.7312, "step": 3428, "task_loss": 0.6626542210578918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7254615426063538, "epoch": 2.9, "learning_rate": 2.9295978746528197e-05, "loss": 0.5826, "step": 3429, "task_loss": 0.6370080709457397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1862752437591553, "epoch": 2.9, "learning_rate": 2.9289940828402368e-05, "loss": 0.8631, "step": 3430, "task_loss": 1.3147554397583008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46955108642578125, "epoch": 2.9, "learning_rate": 2.928390291027654e-05, "loss": 0.4554, "step": 3431, "task_loss": 0.48791080713272095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9962940216064453, "epoch": 2.9, "learning_rate": 2.9277864992150706e-05, "loss": 0.5759, "step": 3432, "task_loss": 1.6304450035095215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5472315549850464, "epoch": 2.9, "learning_rate": 2.9271827074024876e-05, "loss": 0.8826, "step": 3433, "task_loss": 0.4080277383327484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7875730991363525, "epoch": 2.9, "learning_rate": 2.9265789155899047e-05, "loss": 0.6279, "step": 3434, "task_loss": 0.977450966835022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7464276552200317, "epoch": 2.9, "learning_rate": 2.925975123777322e-05, "loss": 0.8775, "step": 3435, "task_loss": 1.0467785596847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8669809103012085, "epoch": 2.9, "learning_rate": 2.9253713319647384e-05, "loss": 0.6656, "step": 3436, "task_loss": 1.655787467956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5054278373718262, "epoch": 2.9, "learning_rate": 2.924767540152156e-05, "loss": 0.5236, "step": 3437, "task_loss": 0.638662576675415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8156613111495972, "epoch": 2.91, "learning_rate": 2.924163748339573e-05, "loss": 0.6833, "step": 3438, "task_loss": 0.41401582956314087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.825654923915863, "epoch": 2.91, "learning_rate": 2.9235599565269893e-05, "loss": 0.6207, "step": 3439, "task_loss": 1.8947583436965942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3961506485939026, "epoch": 2.91, "learning_rate": 2.9229561647144067e-05, "loss": 0.5925, "step": 3440, "task_loss": 0.7312431335449219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7383763790130615, "epoch": 2.91, "learning_rate": 2.9223523729018237e-05, "loss": 1.0785, "step": 3441, "task_loss": 1.5294227600097656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5205205678939819, "epoch": 2.91, "learning_rate": 2.9217485810892404e-05, "loss": 0.7443, "step": 3442, "task_loss": 0.22918947041034698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7278944253921509, "epoch": 2.91, "learning_rate": 2.9211447892766575e-05, "loss": 0.964, "step": 3443, "task_loss": 0.4356386661529541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9423568844795227, "epoch": 2.91, "learning_rate": 2.9205409974640746e-05, "loss": 0.676, "step": 3444, "task_loss": 0.5034884214401245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5563369989395142, "epoch": 2.91, "learning_rate": 2.9199372056514916e-05, "loss": 0.8046, "step": 3445, "task_loss": 1.4774986505508423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.576687216758728, "epoch": 2.91, "learning_rate": 2.9193334138389083e-05, "loss": 0.6555, "step": 3446, "task_loss": 0.343268483877182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0148389339447021, "epoch": 2.91, "learning_rate": 2.9187296220263254e-05, "loss": 0.6587, "step": 3447, "task_loss": 1.250695824623108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6466243267059326, "epoch": 2.91, "learning_rate": 2.9181258302137428e-05, "loss": 0.7111, "step": 3448, "task_loss": 0.7125418782234192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1003168821334839, "epoch": 2.91, "learning_rate": 2.917522038401159e-05, "loss": 0.8602, "step": 3449, "task_loss": 1.1838304996490479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.172728180885315, "epoch": 2.92, "learning_rate": 2.9169182465885762e-05, "loss": 0.7834, "step": 3450, "task_loss": 0.709757387638092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7840103507041931, "epoch": 2.92, "learning_rate": 2.9163144547759936e-05, "loss": 0.7143, "step": 3451, "task_loss": 1.2206426858901978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7525635957717896, "epoch": 2.92, "learning_rate": 2.91571066296341e-05, "loss": 0.7025, "step": 3452, "task_loss": 0.6732720732688904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38421669602394104, "epoch": 2.92, "learning_rate": 2.9151068711508274e-05, "loss": 0.6318, "step": 3453, "task_loss": 0.5469582080841064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8543015718460083, "epoch": 2.92, "learning_rate": 2.9145030793382445e-05, "loss": 0.8314, "step": 3454, "task_loss": 0.6467356085777283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7180390357971191, "epoch": 2.92, "learning_rate": 2.9138992875256615e-05, "loss": 0.7665, "step": 3455, "task_loss": 2.445740222930908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6109917163848877, "epoch": 2.92, "learning_rate": 2.9132954957130782e-05, "loss": 0.7538, "step": 3456, "task_loss": 0.9847996234893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7462831735610962, "epoch": 2.92, "learning_rate": 2.9126917039004953e-05, "loss": 0.7839, "step": 3457, "task_loss": 0.698974072933197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8333162069320679, "epoch": 2.92, "learning_rate": 2.9120879120879123e-05, "loss": 0.7337, "step": 3458, "task_loss": 1.292702078819275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7255497574806213, "epoch": 2.92, "learning_rate": 2.911484120275329e-05, "loss": 0.6461, "step": 3459, "task_loss": 0.5546559691429138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8517965078353882, "epoch": 2.92, "learning_rate": 2.910880328462746e-05, "loss": 0.6971, "step": 3460, "task_loss": 0.7008397579193115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4233158230781555, "epoch": 2.93, "learning_rate": 2.9102765366501632e-05, "loss": 0.47, "step": 3461, "task_loss": 0.22391372919082642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47498345375061035, "epoch": 2.93, "learning_rate": 2.90967274483758e-05, "loss": 0.5993, "step": 3462, "task_loss": 1.7497204542160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6662006974220276, "epoch": 2.93, "learning_rate": 2.909068953024997e-05, "loss": 0.7446, "step": 3463, "task_loss": 1.8353526592254639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9654672145843506, "epoch": 2.93, "learning_rate": 2.9084651612124143e-05, "loss": 0.8181, "step": 3464, "task_loss": 0.6161485910415649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.53838050365448, "epoch": 2.93, "learning_rate": 2.9078613693998314e-05, "loss": 0.6078, "step": 3465, "task_loss": 0.3643706142902374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.568381667137146, "epoch": 2.93, "learning_rate": 2.9072575775872478e-05, "loss": 0.7643, "step": 3466, "task_loss": 0.3740490972995758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5097825527191162, "epoch": 2.93, "learning_rate": 2.9066537857746652e-05, "loss": 0.599, "step": 3467, "task_loss": 0.5570055246353149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6624899506568909, "epoch": 2.93, "learning_rate": 2.9060499939620822e-05, "loss": 0.6175, "step": 3468, "task_loss": 0.5191022157669067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9154953360557556, "epoch": 2.93, "learning_rate": 2.905446202149499e-05, "loss": 0.7352, "step": 3469, "task_loss": 1.5485376119613647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7692798376083374, "epoch": 2.93, "learning_rate": 2.904842410336916e-05, "loss": 0.8144, "step": 3470, "task_loss": 1.2589569091796875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7795748710632324, "epoch": 2.93, "learning_rate": 2.904238618524333e-05, "loss": 0.9128, "step": 3471, "task_loss": 0.9293898344039917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.489696741104126, "epoch": 2.93, "learning_rate": 2.9036348267117498e-05, "loss": 0.8576, "step": 3472, "task_loss": 0.35216429829597473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6539574265480042, "epoch": 2.94, "learning_rate": 2.903031034899167e-05, "loss": 0.638, "step": 3473, "task_loss": 0.7694327235221863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7084947228431702, "epoch": 2.94, "learning_rate": 2.902427243086584e-05, "loss": 0.6679, "step": 3474, "task_loss": 1.428110957145691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6698046922683716, "epoch": 2.94, "learning_rate": 2.901823451274001e-05, "loss": 0.8171, "step": 3475, "task_loss": 0.4634069800376892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6663632392883301, "epoch": 2.94, "learning_rate": 2.9012196594614177e-05, "loss": 0.6838, "step": 3476, "task_loss": 0.6905601024627686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7209047079086304, "epoch": 2.94, "learning_rate": 2.9006158676488347e-05, "loss": 0.6253, "step": 3477, "task_loss": 0.6825904250144958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47803089022636414, "epoch": 2.94, "learning_rate": 2.900012075836252e-05, "loss": 0.6214, "step": 3478, "task_loss": 0.5431719422340393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.798856794834137, "epoch": 2.94, "learning_rate": 2.8994082840236685e-05, "loss": 0.7845, "step": 3479, "task_loss": 1.5912142992019653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3649296760559082, "epoch": 2.94, "learning_rate": 2.898804492211086e-05, "loss": 0.8058, "step": 3480, "task_loss": 1.0541822910308838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7737277746200562, "epoch": 2.94, "learning_rate": 2.898200700398503e-05, "loss": 0.9209, "step": 3481, "task_loss": 1.071914553642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7200210094451904, "epoch": 2.94, "learning_rate": 2.8975969085859193e-05, "loss": 0.7199, "step": 3482, "task_loss": 0.45951420068740845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42172491550445557, "epoch": 2.94, "learning_rate": 2.8969931167733367e-05, "loss": 0.6139, "step": 3483, "task_loss": 0.35511523485183716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7546212673187256, "epoch": 2.94, "learning_rate": 2.8963893249607538e-05, "loss": 0.7285, "step": 3484, "task_loss": 1.1285014152526855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9378154277801514, "epoch": 2.95, "learning_rate": 2.895785533148171e-05, "loss": 0.7675, "step": 3485, "task_loss": 1.2741862535476685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5178618431091309, "epoch": 2.95, "learning_rate": 2.8951817413355876e-05, "loss": 0.8078, "step": 3486, "task_loss": 0.6588294506072998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2086021900177002, "epoch": 2.95, "learning_rate": 2.8945779495230046e-05, "loss": 0.8567, "step": 3487, "task_loss": 1.3689804077148438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47692954540252686, "epoch": 2.95, "learning_rate": 2.8939741577104217e-05, "loss": 0.6614, "step": 3488, "task_loss": 0.659242570400238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9082344174385071, "epoch": 2.95, "learning_rate": 2.8933703658978384e-05, "loss": 0.7667, "step": 3489, "task_loss": 1.9606081247329712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5256115198135376, "epoch": 2.95, "learning_rate": 2.8927665740852555e-05, "loss": 0.4659, "step": 3490, "task_loss": 0.32639172673225403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5164555907249451, "epoch": 2.95, "learning_rate": 2.8921627822726725e-05, "loss": 0.6752, "step": 3491, "task_loss": 0.45916610956192017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.601751446723938, "epoch": 2.95, "learning_rate": 2.8915589904600892e-05, "loss": 0.6905, "step": 3492, "task_loss": 0.66357421875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46919217705726624, "epoch": 2.95, "learning_rate": 2.8909551986475063e-05, "loss": 0.5786, "step": 3493, "task_loss": 0.2836116850376129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0456271171569824, "epoch": 2.95, "learning_rate": 2.8903514068349237e-05, "loss": 0.9233, "step": 3494, "task_loss": 1.2850019931793213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4371858537197113, "epoch": 2.95, "learning_rate": 2.8897476150223407e-05, "loss": 0.53, "step": 3495, "task_loss": 0.8951230645179749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.784016489982605, "epoch": 2.95, "learning_rate": 2.889143823209757e-05, "loss": 0.7503, "step": 3496, "task_loss": 1.0585284233093262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9110751748085022, "epoch": 2.96, "learning_rate": 2.8885400313971745e-05, "loss": 0.7518, "step": 3497, "task_loss": 1.1744385957717896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8005282878875732, "epoch": 2.96, "learning_rate": 2.8879362395845916e-05, "loss": 0.7192, "step": 3498, "task_loss": 1.1871466636657715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9272075891494751, "epoch": 2.96, "learning_rate": 2.8873324477720083e-05, "loss": 0.908, "step": 3499, "task_loss": 0.5407767295837402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7700062990188599, "epoch": 2.96, "learning_rate": 2.8867286559594254e-05, "loss": 0.6472, "step": 3500, "task_loss": 1.0530611276626587 }, { "epoch": 2.96, "eval_accuracy": 0.8907722772277228, "eval_loss": 0.43368417024612427, "eval_runtime": 227.1671, "eval_samples_per_second": 111.152, "eval_steps_per_second": 0.872, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5895242691040039, "epoch": 2.96, "learning_rate": 2.8861248641468424e-05, "loss": 0.5867, "step": 3501, "task_loss": 1.4885001182556152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6212030649185181, "epoch": 2.96, "learning_rate": 2.885521072334259e-05, "loss": 0.7024, "step": 3502, "task_loss": 0.5673592686653137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7216736078262329, "epoch": 2.96, "learning_rate": 2.8849172805216762e-05, "loss": 0.5715, "step": 3503, "task_loss": 0.2849804162979126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8654711842536926, "epoch": 2.96, "learning_rate": 2.8843134887090932e-05, "loss": 0.7444, "step": 3504, "task_loss": 1.7764543294906616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35397881269454956, "epoch": 2.96, "learning_rate": 2.8837096968965106e-05, "loss": 0.5267, "step": 3505, "task_loss": 1.782389521598816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0136513710021973, "epoch": 2.96, "learning_rate": 2.883105905083927e-05, "loss": 0.6698, "step": 3506, "task_loss": 1.268263816833496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7251628637313843, "epoch": 2.96, "learning_rate": 2.882502113271344e-05, "loss": 0.8858, "step": 3507, "task_loss": 1.0930500030517578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3958415985107422, "epoch": 2.96, "learning_rate": 2.8818983214587615e-05, "loss": 0.8625, "step": 3508, "task_loss": 0.7988516092300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3297687768936157, "epoch": 2.97, "learning_rate": 2.881294529646178e-05, "loss": 0.9801, "step": 3509, "task_loss": 1.4945991039276123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8767632842063904, "epoch": 2.97, "learning_rate": 2.8806907378335952e-05, "loss": 0.7777, "step": 3510, "task_loss": 2.163203001022339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6318197250366211, "epoch": 2.97, "learning_rate": 2.8800869460210123e-05, "loss": 1.0891, "step": 3511, "task_loss": 1.8774516582489014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6436439752578735, "epoch": 2.97, "learning_rate": 2.8794831542084287e-05, "loss": 0.7685, "step": 3512, "task_loss": 0.9692749977111816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6486576199531555, "epoch": 2.97, "learning_rate": 2.878879362395846e-05, "loss": 0.7953, "step": 3513, "task_loss": 1.781054139137268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5908812284469604, "epoch": 2.97, "learning_rate": 2.878275570583263e-05, "loss": 0.6084, "step": 3514, "task_loss": 0.10436081141233444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6379696726799011, "epoch": 2.97, "learning_rate": 2.8776717787706802e-05, "loss": 0.8485, "step": 3515, "task_loss": 1.4927537441253662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8152763843536377, "epoch": 2.97, "learning_rate": 2.877067986958097e-05, "loss": 0.6518, "step": 3516, "task_loss": 0.7280385494232178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39982518553733826, "epoch": 2.97, "learning_rate": 2.876464195145514e-05, "loss": 0.6249, "step": 3517, "task_loss": 0.5262916684150696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5574002265930176, "epoch": 2.97, "learning_rate": 2.875860403332931e-05, "loss": 0.5957, "step": 3518, "task_loss": 0.5144563317298889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7622504234313965, "epoch": 2.97, "learning_rate": 2.8752566115203477e-05, "loss": 0.6903, "step": 3519, "task_loss": 0.745737612247467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.472598135471344, "epoch": 2.97, "learning_rate": 2.8746528197077648e-05, "loss": 0.6826, "step": 3520, "task_loss": 0.24479055404663086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.291789174079895, "epoch": 2.98, "learning_rate": 2.8740490278951822e-05, "loss": 0.6862, "step": 3521, "task_loss": 1.5910961627960205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8099395632743835, "epoch": 2.98, "learning_rate": 2.8734452360825986e-05, "loss": 0.8086, "step": 3522, "task_loss": 0.39356908202171326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7453854084014893, "epoch": 2.98, "learning_rate": 2.8728414442700156e-05, "loss": 0.6738, "step": 3523, "task_loss": 0.7309139370918274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7587268352508545, "epoch": 2.98, "learning_rate": 2.872237652457433e-05, "loss": 0.682, "step": 3524, "task_loss": 0.7517848610877991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0082471370697021, "epoch": 2.98, "learning_rate": 2.8716338606448494e-05, "loss": 0.7788, "step": 3525, "task_loss": 0.5595014095306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9076305627822876, "epoch": 2.98, "learning_rate": 2.8710300688322668e-05, "loss": 0.842, "step": 3526, "task_loss": 0.5062964558601379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5072662830352783, "epoch": 2.98, "learning_rate": 2.870426277019684e-05, "loss": 0.6921, "step": 3527, "task_loss": 1.0915141105651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9681079983711243, "epoch": 2.98, "learning_rate": 2.869822485207101e-05, "loss": 0.7579, "step": 3528, "task_loss": 0.9980337619781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.5782496929168701, "epoch": 2.98, "learning_rate": 2.8692186933945176e-05, "loss": 0.7787, "step": 3529, "task_loss": 1.2713779211044312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7345553636550903, "epoch": 2.98, "learning_rate": 2.8686149015819347e-05, "loss": 0.7156, "step": 3530, "task_loss": 1.9231317043304443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45823144912719727, "epoch": 2.98, "learning_rate": 2.8680111097693518e-05, "loss": 0.7447, "step": 3531, "task_loss": 0.2629527449607849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5715450048446655, "epoch": 2.99, "learning_rate": 2.8674073179567685e-05, "loss": 0.6841, "step": 3532, "task_loss": 0.4190598428249359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8194421529769897, "epoch": 2.99, "learning_rate": 2.8668035261441855e-05, "loss": 0.7353, "step": 3533, "task_loss": 1.7012922763824463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6091118454933167, "epoch": 2.99, "learning_rate": 2.8661997343316026e-05, "loss": 0.702, "step": 3534, "task_loss": 0.5715523362159729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.715003252029419, "epoch": 2.99, "learning_rate": 2.8655959425190193e-05, "loss": 0.8046, "step": 3535, "task_loss": 0.8012397885322571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5494982004165649, "epoch": 2.99, "learning_rate": 2.8649921507064364e-05, "loss": 0.7831, "step": 3536, "task_loss": 0.5300018191337585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5259543657302856, "epoch": 2.99, "learning_rate": 2.8643883588938538e-05, "loss": 0.5291, "step": 3537, "task_loss": 1.088890552520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5186984539031982, "epoch": 2.99, "learning_rate": 2.8637845670812708e-05, "loss": 0.619, "step": 3538, "task_loss": 0.8404145240783691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8344303369522095, "epoch": 2.99, "learning_rate": 2.8631807752686872e-05, "loss": 0.6524, "step": 3539, "task_loss": 1.8482786417007446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5549348592758179, "epoch": 2.99, "learning_rate": 2.8625769834561046e-05, "loss": 0.7255, "step": 3540, "task_loss": 1.7015589475631714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5830883383750916, "epoch": 2.99, "learning_rate": 2.8619731916435216e-05, "loss": 0.5847, "step": 3541, "task_loss": 0.8835998177528381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0474164485931396, "epoch": 2.99, "learning_rate": 2.8613693998309384e-05, "loss": 0.8207, "step": 3542, "task_loss": 0.7856638431549072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6131393909454346, "epoch": 2.99, "learning_rate": 2.8607656080183554e-05, "loss": 0.6238, "step": 3543, "task_loss": 1.630849838256836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46714574098587036, "epoch": 3.0, "learning_rate": 2.8601618162057725e-05, "loss": 0.8634, "step": 3544, "task_loss": 0.7398757934570312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41894951462745667, "epoch": 3.0, "learning_rate": 2.8595580243931892e-05, "loss": 0.5324, "step": 3545, "task_loss": 0.567123293876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2912333011627197, "epoch": 3.0, "learning_rate": 2.8589542325806063e-05, "loss": 0.8322, "step": 3546, "task_loss": 1.039556860923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6628650426864624, "epoch": 3.0, "learning_rate": 2.8583504407680233e-05, "loss": 0.6427, "step": 3547, "task_loss": 0.7430834174156189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7094494104385376, "epoch": 3.0, "learning_rate": 2.8577466489554404e-05, "loss": 0.5733, "step": 3548, "task_loss": 0.2793852984905243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8963229060173035, "epoch": 3.0, "learning_rate": 2.857142857142857e-05, "loss": 0.6604, "step": 3549, "task_loss": 0.2541487216949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6640872955322266, "epoch": 3.0, "learning_rate": 2.856539065330274e-05, "loss": 1.4603, "step": 3550, "task_loss": 0.9503651857376099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4315778613090515, "epoch": 3.0, "learning_rate": 2.8559352735176915e-05, "loss": 0.5902, "step": 3551, "task_loss": 0.6308757066726685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8534530401229858, "epoch": 3.0, "learning_rate": 2.855331481705108e-05, "loss": 0.63, "step": 3552, "task_loss": 0.3486424684524536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7001720666885376, "epoch": 3.0, "learning_rate": 2.8547276898925253e-05, "loss": 0.5578, "step": 3553, "task_loss": 0.9340201616287231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8621876835823059, "epoch": 3.0, "learning_rate": 2.8541238980799424e-05, "loss": 0.6507, "step": 3554, "task_loss": 0.7576887607574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4352402091026306, "epoch": 3.01, "learning_rate": 2.8535201062673588e-05, "loss": 0.6523, "step": 3555, "task_loss": 0.4721548557281494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5970913171768188, "epoch": 3.01, "learning_rate": 2.852916314454776e-05, "loss": 0.5143, "step": 3556, "task_loss": 0.7570653557777405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6047800779342651, "epoch": 3.01, "learning_rate": 2.8523125226421932e-05, "loss": 0.6389, "step": 3557, "task_loss": 0.46549686789512634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.55394446849823, "epoch": 3.01, "learning_rate": 2.8517087308296103e-05, "loss": 0.7055, "step": 3558, "task_loss": 0.8493452072143555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5711374878883362, "epoch": 3.01, "learning_rate": 2.851104939017027e-05, "loss": 0.9064, "step": 3559, "task_loss": 0.6857057809829712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7787516117095947, "epoch": 3.01, "learning_rate": 2.850501147204444e-05, "loss": 0.6255, "step": 3560, "task_loss": 1.4883779287338257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5478712320327759, "epoch": 3.01, "learning_rate": 2.849897355391861e-05, "loss": 0.7224, "step": 3561, "task_loss": 1.5161023139953613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7135688066482544, "epoch": 3.01, "learning_rate": 2.8492935635792778e-05, "loss": 0.6565, "step": 3562, "task_loss": 0.3719140291213989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1531177759170532, "epoch": 3.01, "learning_rate": 2.848689771766695e-05, "loss": 0.8902, "step": 3563, "task_loss": 0.7203177213668823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5592091083526611, "epoch": 3.01, "learning_rate": 2.848085979954112e-05, "loss": 0.76, "step": 3564, "task_loss": 0.7370966076850891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5997725129127502, "epoch": 3.01, "learning_rate": 2.8474821881415286e-05, "loss": 0.735, "step": 3565, "task_loss": 1.2105438709259033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9959639310836792, "epoch": 3.01, "learning_rate": 2.8468783963289457e-05, "loss": 0.616, "step": 3566, "task_loss": 0.43924281001091003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6430736780166626, "epoch": 3.02, "learning_rate": 2.846274604516363e-05, "loss": 0.8501, "step": 3567, "task_loss": 1.0086430311203003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8296298384666443, "epoch": 3.02, "learning_rate": 2.84567081270378e-05, "loss": 0.7352, "step": 3568, "task_loss": 1.684377908706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5806262493133545, "epoch": 3.02, "learning_rate": 2.8450670208911965e-05, "loss": 0.7075, "step": 3569, "task_loss": 0.48733314871788025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1771345138549805, "epoch": 3.02, "learning_rate": 2.844463229078614e-05, "loss": 0.6196, "step": 3570, "task_loss": 1.2887576818466187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32745856046676636, "epoch": 3.02, "learning_rate": 2.843859437266031e-05, "loss": 0.5654, "step": 3571, "task_loss": 0.4443657100200653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8458269238471985, "epoch": 3.02, "learning_rate": 2.8432556454534477e-05, "loss": 0.837, "step": 3572, "task_loss": 0.7712803483009338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2610406875610352, "epoch": 3.02, "learning_rate": 2.8426518536408648e-05, "loss": 0.6469, "step": 3573, "task_loss": 1.287882924079895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3581000864505768, "epoch": 3.02, "learning_rate": 2.8420480618282818e-05, "loss": 0.5144, "step": 3574, "task_loss": 0.49181362986564636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6357303857803345, "epoch": 3.02, "learning_rate": 2.8414442700156985e-05, "loss": 0.819, "step": 3575, "task_loss": 1.0158765316009521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7117151618003845, "epoch": 3.02, "learning_rate": 2.8408404782031156e-05, "loss": 0.5709, "step": 3576, "task_loss": 0.8816638588905334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7645248174667358, "epoch": 3.02, "learning_rate": 2.8402366863905327e-05, "loss": 0.6262, "step": 3577, "task_loss": 0.3988209068775177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38314932584762573, "epoch": 3.02, "learning_rate": 2.83963289457795e-05, "loss": 0.7823, "step": 3578, "task_loss": 0.32058268785476685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6896148920059204, "epoch": 3.03, "learning_rate": 2.8390291027653664e-05, "loss": 0.5591, "step": 3579, "task_loss": 0.6103711128234863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4302940368652344, "epoch": 3.03, "learning_rate": 2.8384253109527835e-05, "loss": 0.6298, "step": 3580, "task_loss": 0.5452830195426941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.586306095123291, "epoch": 3.03, "learning_rate": 2.837821519140201e-05, "loss": 0.7412, "step": 3581, "task_loss": 0.9802173972129822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5140480995178223, "epoch": 3.03, "learning_rate": 2.8372177273276173e-05, "loss": 0.5335, "step": 3582, "task_loss": 0.6141536235809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6005988717079163, "epoch": 3.03, "learning_rate": 2.8366139355150347e-05, "loss": 0.8635, "step": 3583, "task_loss": 1.576610803604126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7433828115463257, "epoch": 3.03, "learning_rate": 2.8360101437024517e-05, "loss": 0.5687, "step": 3584, "task_loss": 0.8613354563713074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3786167502403259, "epoch": 3.03, "learning_rate": 2.835406351889868e-05, "loss": 0.613, "step": 3585, "task_loss": 0.6103795766830444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8002123832702637, "epoch": 3.03, "learning_rate": 2.8348025600772855e-05, "loss": 0.6844, "step": 3586, "task_loss": 0.6950814723968506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3226242661476135, "epoch": 3.03, "learning_rate": 2.8341987682647025e-05, "loss": 0.7632, "step": 3587, "task_loss": 1.1891282796859741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.679539680480957, "epoch": 3.03, "learning_rate": 2.8335949764521196e-05, "loss": 0.7384, "step": 3588, "task_loss": 1.099945306777954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4051525592803955, "epoch": 3.03, "learning_rate": 2.8329911846395363e-05, "loss": 0.5794, "step": 3589, "task_loss": 0.39798703789711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0923786163330078, "epoch": 3.03, "learning_rate": 2.8323873928269534e-05, "loss": 0.791, "step": 3590, "task_loss": 1.001700520515442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8324576020240784, "epoch": 3.04, "learning_rate": 2.8317836010143704e-05, "loss": 0.7296, "step": 3591, "task_loss": 1.239029049873352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7699583172798157, "epoch": 3.04, "learning_rate": 2.831179809201787e-05, "loss": 0.7131, "step": 3592, "task_loss": 1.0743050575256348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6002956628799438, "epoch": 3.04, "learning_rate": 2.8305760173892042e-05, "loss": 0.6503, "step": 3593, "task_loss": 0.3002236485481262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3624074459075928, "epoch": 3.04, "learning_rate": 2.8299722255766216e-05, "loss": 0.7515, "step": 3594, "task_loss": 1.0848666429519653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6987748146057129, "epoch": 3.04, "learning_rate": 2.829368433764038e-05, "loss": 0.6866, "step": 3595, "task_loss": 0.7544407248497009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6010640859603882, "epoch": 3.04, "learning_rate": 2.828764641951455e-05, "loss": 0.4936, "step": 3596, "task_loss": 0.6365046501159668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8547073006629944, "epoch": 3.04, "learning_rate": 2.8281608501388724e-05, "loss": 0.6264, "step": 3597, "task_loss": 1.7877731323242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5024874210357666, "epoch": 3.04, "learning_rate": 2.8275570583262895e-05, "loss": 0.5506, "step": 3598, "task_loss": 0.9955921769142151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7183303833007812, "epoch": 3.04, "learning_rate": 2.8269532665137062e-05, "loss": 0.6703, "step": 3599, "task_loss": 1.0533134937286377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.161064624786377, "epoch": 3.04, "learning_rate": 2.8263494747011233e-05, "loss": 0.8192, "step": 3600, "task_loss": 0.5414749979972839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.798387348651886, "epoch": 3.04, "learning_rate": 2.8257456828885403e-05, "loss": 0.644, "step": 3601, "task_loss": 1.8769289255142212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9048281311988831, "epoch": 3.04, "learning_rate": 2.825141891075957e-05, "loss": 0.9795, "step": 3602, "task_loss": 2.0014753341674805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.664453387260437, "epoch": 3.05, "learning_rate": 2.824538099263374e-05, "loss": 0.5806, "step": 3603, "task_loss": 0.8743603229522705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9036272764205933, "epoch": 3.05, "learning_rate": 2.823934307450791e-05, "loss": 0.6876, "step": 3604, "task_loss": 0.4418850243091583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8576907515525818, "epoch": 3.05, "learning_rate": 2.823330515638208e-05, "loss": 0.5589, "step": 3605, "task_loss": 0.3267512023448944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4059901535511017, "epoch": 3.05, "learning_rate": 2.822726723825625e-05, "loss": 0.4068, "step": 3606, "task_loss": 0.662968099117279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8640851974487305, "epoch": 3.05, "learning_rate": 2.822122932013042e-05, "loss": 0.5747, "step": 3607, "task_loss": 0.3103358745574951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1322784423828125, "epoch": 3.05, "learning_rate": 2.8215191402004594e-05, "loss": 0.8231, "step": 3608, "task_loss": 0.7384780049324036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45259132981300354, "epoch": 3.05, "learning_rate": 2.8209153483878758e-05, "loss": 0.6312, "step": 3609, "task_loss": 0.5664212107658386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.254497230052948, "epoch": 3.05, "learning_rate": 2.820311556575293e-05, "loss": 0.5668, "step": 3610, "task_loss": 0.4529813528060913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6295821666717529, "epoch": 3.05, "learning_rate": 2.8197077647627102e-05, "loss": 0.5766, "step": 3611, "task_loss": 0.6987727284431458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49692821502685547, "epoch": 3.05, "learning_rate": 2.8191039729501266e-05, "loss": 0.5519, "step": 3612, "task_loss": 0.4025169014930725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30930906534194946, "epoch": 3.05, "learning_rate": 2.818500181137544e-05, "loss": 0.5756, "step": 3613, "task_loss": 0.2295258641242981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6490362286567688, "epoch": 3.05, "learning_rate": 2.817896389324961e-05, "loss": 0.622, "step": 3614, "task_loss": 0.7238801121711731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30780038237571716, "epoch": 3.06, "learning_rate": 2.8172925975123778e-05, "loss": 0.5441, "step": 3615, "task_loss": 0.4116123616695404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.535415530204773, "epoch": 3.06, "learning_rate": 2.8166888056997948e-05, "loss": 0.6106, "step": 3616, "task_loss": 0.33095329999923706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7923001050949097, "epoch": 3.06, "learning_rate": 2.816085013887212e-05, "loss": 0.6397, "step": 3617, "task_loss": 1.055081844329834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.886846661567688, "epoch": 3.06, "learning_rate": 2.815481222074629e-05, "loss": 0.6598, "step": 3618, "task_loss": 0.8735306859016418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4023129343986511, "epoch": 3.06, "learning_rate": 2.8148774302620457e-05, "loss": 0.7718, "step": 3619, "task_loss": 0.09780261665582657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3630629777908325, "epoch": 3.06, "learning_rate": 2.8142736384494627e-05, "loss": 0.5759, "step": 3620, "task_loss": 0.12758219242095947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8883981704711914, "epoch": 3.06, "learning_rate": 2.8136698466368798e-05, "loss": 0.7524, "step": 3621, "task_loss": 0.5994730591773987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5292114019393921, "epoch": 3.06, "learning_rate": 2.8130660548242965e-05, "loss": 0.7045, "step": 3622, "task_loss": 0.9696571230888367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6077001094818115, "epoch": 3.06, "learning_rate": 2.8124622630117136e-05, "loss": 0.5493, "step": 3623, "task_loss": 0.6632014513015747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5201268196105957, "epoch": 3.06, "learning_rate": 2.811858471199131e-05, "loss": 0.7625, "step": 3624, "task_loss": 0.8669519424438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7835447192192078, "epoch": 3.06, "learning_rate": 2.8112546793865473e-05, "loss": 0.6, "step": 3625, "task_loss": 1.0663723945617676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8852217197418213, "epoch": 3.07, "learning_rate": 2.8106508875739644e-05, "loss": 0.6817, "step": 3626, "task_loss": 1.1238359212875366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6591657996177673, "epoch": 3.07, "learning_rate": 2.8100470957613818e-05, "loss": 0.6157, "step": 3627, "task_loss": 0.3449137806892395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9209223985671997, "epoch": 3.07, "learning_rate": 2.809443303948799e-05, "loss": 0.6752, "step": 3628, "task_loss": 0.5252447724342346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3900834619998932, "epoch": 3.07, "learning_rate": 2.8088395121362156e-05, "loss": 0.502, "step": 3629, "task_loss": 0.5338437557220459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7916116118431091, "epoch": 3.07, "learning_rate": 2.8082357203236326e-05, "loss": 0.6483, "step": 3630, "task_loss": 0.6517319679260254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9732638597488403, "epoch": 3.07, "learning_rate": 2.8076319285110497e-05, "loss": 0.7296, "step": 3631, "task_loss": 1.3063803911209106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8414730429649353, "epoch": 3.07, "learning_rate": 2.8070281366984664e-05, "loss": 0.6335, "step": 3632, "task_loss": 0.30291834473609924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7128413915634155, "epoch": 3.07, "learning_rate": 2.8064243448858834e-05, "loss": 0.7648, "step": 3633, "task_loss": 1.2600635290145874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.767766535282135, "epoch": 3.07, "learning_rate": 2.8058205530733005e-05, "loss": 0.6109, "step": 3634, "task_loss": 0.4626900851726532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5590154528617859, "epoch": 3.07, "learning_rate": 2.8052167612607172e-05, "loss": 0.7046, "step": 3635, "task_loss": 0.4575844705104828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.658909797668457, "epoch": 3.07, "learning_rate": 2.8046129694481343e-05, "loss": 0.5722, "step": 3636, "task_loss": 0.2537018060684204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3340317904949188, "epoch": 3.07, "learning_rate": 2.8040091776355513e-05, "loss": 0.5649, "step": 3637, "task_loss": 0.9174350500106812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6891013979911804, "epoch": 3.08, "learning_rate": 2.8034053858229687e-05, "loss": 0.7036, "step": 3638, "task_loss": 0.6399855017662048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5071496963500977, "epoch": 3.08, "learning_rate": 2.802801594010385e-05, "loss": 0.569, "step": 3639, "task_loss": 0.21990296244621277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8344600796699524, "epoch": 3.08, "learning_rate": 2.8021978021978025e-05, "loss": 0.6986, "step": 3640, "task_loss": 1.393924355506897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7259501218795776, "epoch": 3.08, "learning_rate": 2.8015940103852196e-05, "loss": 0.7242, "step": 3641, "task_loss": 0.33193981647491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.67268306016922, "epoch": 3.08, "learning_rate": 2.800990218572636e-05, "loss": 0.6721, "step": 3642, "task_loss": 1.0997542142868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6848774552345276, "epoch": 3.08, "learning_rate": 2.8003864267600533e-05, "loss": 0.6455, "step": 3643, "task_loss": 0.5901796221733093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.528223991394043, "epoch": 3.08, "learning_rate": 2.7997826349474704e-05, "loss": 0.5825, "step": 3644, "task_loss": 0.5343807935714722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6142597794532776, "epoch": 3.08, "learning_rate": 2.799178843134887e-05, "loss": 0.4828, "step": 3645, "task_loss": 0.19231250882148743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4365150034427643, "epoch": 3.08, "learning_rate": 2.7985750513223042e-05, "loss": 0.5505, "step": 3646, "task_loss": 0.5565472841262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6380513906478882, "epoch": 3.08, "learning_rate": 2.7979712595097212e-05, "loss": 0.7049, "step": 3647, "task_loss": 0.6846076250076294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3350088894367218, "epoch": 3.08, "learning_rate": 2.7973674676971383e-05, "loss": 0.7559, "step": 3648, "task_loss": 0.790398895740509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.721883237361908, "epoch": 3.08, "learning_rate": 2.796763675884555e-05, "loss": 0.6988, "step": 3649, "task_loss": 0.4710249900817871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7126713395118713, "epoch": 3.09, "learning_rate": 2.796159884071972e-05, "loss": 0.62, "step": 3650, "task_loss": 1.6085669994354248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48656484484672546, "epoch": 3.09, "learning_rate": 2.7955560922593895e-05, "loss": 0.8002, "step": 3651, "task_loss": 0.7507634162902832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.118462324142456, "epoch": 3.09, "learning_rate": 2.794952300446806e-05, "loss": 0.9007, "step": 3652, "task_loss": 1.2093141078948975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7298305034637451, "epoch": 3.09, "learning_rate": 2.794348508634223e-05, "loss": 0.6903, "step": 3653, "task_loss": 0.8094022870063782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0075494050979614, "epoch": 3.09, "learning_rate": 2.7937447168216403e-05, "loss": 0.9123, "step": 3654, "task_loss": 1.181075096130371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6745384931564331, "epoch": 3.09, "learning_rate": 2.7931409250090567e-05, "loss": 0.6325, "step": 3655, "task_loss": 0.7473085522651672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2099676132202148, "epoch": 3.09, "learning_rate": 2.792537133196474e-05, "loss": 0.7002, "step": 3656, "task_loss": 1.2366188764572144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5508182048797607, "epoch": 3.09, "learning_rate": 2.791933341383891e-05, "loss": 0.8052, "step": 3657, "task_loss": 0.5048431158065796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7220621109008789, "epoch": 3.09, "learning_rate": 2.7913295495713082e-05, "loss": 0.8288, "step": 3658, "task_loss": 0.9081284999847412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.644292414188385, "epoch": 3.09, "learning_rate": 2.790725757758725e-05, "loss": 0.4916, "step": 3659, "task_loss": 0.505931556224823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0231115818023682, "epoch": 3.09, "learning_rate": 2.790121965946142e-05, "loss": 0.6745, "step": 3660, "task_loss": 0.6946086287498474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8683412671089172, "epoch": 3.09, "learning_rate": 2.789518174133559e-05, "loss": 0.674, "step": 3661, "task_loss": 1.1246572732925415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5513156652450562, "epoch": 3.1, "learning_rate": 2.7889143823209757e-05, "loss": 0.6882, "step": 3662, "task_loss": 1.1743996143341064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4099823832511902, "epoch": 3.1, "learning_rate": 2.7883105905083928e-05, "loss": 0.6084, "step": 3663, "task_loss": 0.9822730422019958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6249244809150696, "epoch": 3.1, "learning_rate": 2.78770679869581e-05, "loss": 0.8708, "step": 3664, "task_loss": 1.5089528560638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8995240926742554, "epoch": 3.1, "learning_rate": 2.7871030068832266e-05, "loss": 0.7189, "step": 3665, "task_loss": 0.6115638613700867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7424576282501221, "epoch": 3.1, "learning_rate": 2.7864992150706436e-05, "loss": 0.5726, "step": 3666, "task_loss": 0.438534140586853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5751792192459106, "epoch": 3.1, "learning_rate": 2.785895423258061e-05, "loss": 0.5997, "step": 3667, "task_loss": 0.40415582060813904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3761516213417053, "epoch": 3.1, "learning_rate": 2.785291631445478e-05, "loss": 0.6507, "step": 3668, "task_loss": 0.13075126707553864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5189539790153503, "epoch": 3.1, "learning_rate": 2.7846878396328945e-05, "loss": 0.5213, "step": 3669, "task_loss": 0.2792453467845917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4724988341331482, "epoch": 3.1, "learning_rate": 2.784084047820312e-05, "loss": 0.5688, "step": 3670, "task_loss": 0.7488003969192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4064655303955078, "epoch": 3.1, "learning_rate": 2.783480256007729e-05, "loss": 0.509, "step": 3671, "task_loss": 0.7058221697807312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6107677221298218, "epoch": 3.1, "learning_rate": 2.7828764641951456e-05, "loss": 0.4979, "step": 3672, "task_loss": 0.29844528436660767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1922931671142578, "epoch": 3.1, "learning_rate": 2.7822726723825627e-05, "loss": 0.8774, "step": 3673, "task_loss": 0.5935751795768738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2958366870880127, "epoch": 3.11, "learning_rate": 2.7816688805699797e-05, "loss": 0.6235, "step": 3674, "task_loss": 0.2845328152179718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46887263655662537, "epoch": 3.11, "learning_rate": 2.7810650887573965e-05, "loss": 0.7092, "step": 3675, "task_loss": 0.3006347715854645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5558429956436157, "epoch": 3.11, "learning_rate": 2.7804612969448135e-05, "loss": 0.6714, "step": 3676, "task_loss": 0.25843146443367004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6814650297164917, "epoch": 3.11, "learning_rate": 2.7798575051322306e-05, "loss": 0.6944, "step": 3677, "task_loss": 0.3054063022136688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6725668907165527, "epoch": 3.11, "learning_rate": 2.7792537133196476e-05, "loss": 0.7572, "step": 3678, "task_loss": 1.0969210863113403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5995408296585083, "epoch": 3.11, "learning_rate": 2.7786499215070643e-05, "loss": 0.7603, "step": 3679, "task_loss": 1.122049331665039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8708053827285767, "epoch": 3.11, "learning_rate": 2.7780461296944814e-05, "loss": 0.699, "step": 3680, "task_loss": 1.114810585975647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35624855756759644, "epoch": 3.11, "learning_rate": 2.7774423378818988e-05, "loss": 0.495, "step": 3681, "task_loss": 0.22323492169380188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5000277757644653, "epoch": 3.11, "learning_rate": 2.7768385460693152e-05, "loss": 0.6666, "step": 3682, "task_loss": 0.691615104675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8480166792869568, "epoch": 3.11, "learning_rate": 2.7762347542567326e-05, "loss": 0.921, "step": 3683, "task_loss": 1.2991292476654053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5013954639434814, "epoch": 3.11, "learning_rate": 2.7756309624441496e-05, "loss": 0.5126, "step": 3684, "task_loss": 0.31024083495140076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3420479893684387, "epoch": 3.11, "learning_rate": 2.775027170631566e-05, "loss": 0.5629, "step": 3685, "task_loss": 0.7034569978713989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6461314558982849, "epoch": 3.12, "learning_rate": 2.7744233788189834e-05, "loss": 0.683, "step": 3686, "task_loss": 0.5709354877471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.824910581111908, "epoch": 3.12, "learning_rate": 2.7738195870064005e-05, "loss": 0.6299, "step": 3687, "task_loss": 0.38113948702812195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6973512172698975, "epoch": 3.12, "learning_rate": 2.7732157951938175e-05, "loss": 0.5573, "step": 3688, "task_loss": 0.15177257359027863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.992124080657959, "epoch": 3.12, "learning_rate": 2.7726120033812342e-05, "loss": 0.7601, "step": 3689, "task_loss": 0.9330900311470032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.399638295173645, "epoch": 3.12, "learning_rate": 2.7720082115686513e-05, "loss": 0.6703, "step": 3690, "task_loss": 1.2728360891342163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5215007662773132, "epoch": 3.12, "learning_rate": 2.7714044197560684e-05, "loss": 0.5585, "step": 3691, "task_loss": 0.5350586175918579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5828335881233215, "epoch": 3.12, "learning_rate": 2.770800627943485e-05, "loss": 0.3995, "step": 3692, "task_loss": 0.594528317451477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8291279077529907, "epoch": 3.12, "learning_rate": 2.770196836130902e-05, "loss": 0.6702, "step": 3693, "task_loss": 0.9862347841262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23790481686592102, "epoch": 3.12, "learning_rate": 2.7695930443183192e-05, "loss": 0.5624, "step": 3694, "task_loss": 0.03650989755988121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5780038237571716, "epoch": 3.12, "learning_rate": 2.768989252505736e-05, "loss": 0.6121, "step": 3695, "task_loss": 1.1103105545043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9621471166610718, "epoch": 3.12, "learning_rate": 2.768385460693153e-05, "loss": 0.75, "step": 3696, "task_loss": 1.7560906410217285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9815957546234131, "epoch": 3.13, "learning_rate": 2.7677816688805704e-05, "loss": 0.7821, "step": 3697, "task_loss": 2.156797170639038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.955795407295227, "epoch": 3.13, "learning_rate": 2.7671778770679874e-05, "loss": 0.7939, "step": 3698, "task_loss": 1.3579325675964355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5660983324050903, "epoch": 3.13, "learning_rate": 2.7665740852554038e-05, "loss": 0.6988, "step": 3699, "task_loss": 0.5261045098304749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48553627729415894, "epoch": 3.13, "learning_rate": 2.7659702934428212e-05, "loss": 0.7183, "step": 3700, "task_loss": 0.7135216593742371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7787368297576904, "epoch": 3.13, "learning_rate": 2.7653665016302382e-05, "loss": 0.6165, "step": 3701, "task_loss": 0.4789673686027527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3822312355041504, "epoch": 3.13, "learning_rate": 2.764762709817655e-05, "loss": 0.6976, "step": 3702, "task_loss": 0.3869742751121521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3307849168777466, "epoch": 3.13, "learning_rate": 2.764158918005072e-05, "loss": 0.7157, "step": 3703, "task_loss": 0.2516452670097351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5310413241386414, "epoch": 3.13, "learning_rate": 2.763555126192489e-05, "loss": 0.6498, "step": 3704, "task_loss": 0.5294978022575378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8055798411369324, "epoch": 3.13, "learning_rate": 2.7629513343799058e-05, "loss": 0.7803, "step": 3705, "task_loss": 1.8335318565368652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.962742269039154, "epoch": 3.13, "learning_rate": 2.762347542567323e-05, "loss": 0.7151, "step": 3706, "task_loss": 1.055146336555481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7460930943489075, "epoch": 3.13, "learning_rate": 2.76174375075474e-05, "loss": 0.7352, "step": 3707, "task_loss": 0.8432438373565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.502670407295227, "epoch": 3.13, "learning_rate": 2.7611399589421566e-05, "loss": 0.5972, "step": 3708, "task_loss": 0.1334424614906311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1671204566955566, "epoch": 3.14, "learning_rate": 2.7605361671295737e-05, "loss": 0.917, "step": 3709, "task_loss": 1.006469488143921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4803660809993744, "epoch": 3.14, "learning_rate": 2.7599323753169907e-05, "loss": 0.62, "step": 3710, "task_loss": 0.9983674883842468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7216548919677734, "epoch": 3.14, "learning_rate": 2.759328583504408e-05, "loss": 0.6371, "step": 3711, "task_loss": 2.0298728942871094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41495662927627563, "epoch": 3.14, "learning_rate": 2.7587247916918245e-05, "loss": 0.6291, "step": 3712, "task_loss": 0.4697630703449249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.446172833442688, "epoch": 3.14, "learning_rate": 2.758120999879242e-05, "loss": 0.6026, "step": 3713, "task_loss": 0.8716745972633362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7951304912567139, "epoch": 3.14, "learning_rate": 2.757517208066659e-05, "loss": 0.5652, "step": 3714, "task_loss": 0.303600937128067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5242997407913208, "epoch": 3.14, "learning_rate": 2.7569134162540754e-05, "loss": 0.6211, "step": 3715, "task_loss": 0.19346529245376587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8720619678497314, "epoch": 3.14, "learning_rate": 2.7563096244414927e-05, "loss": 0.6253, "step": 3716, "task_loss": 0.37648487091064453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7255346179008484, "epoch": 3.14, "learning_rate": 2.7557058326289098e-05, "loss": 0.7578, "step": 3717, "task_loss": 1.1927930116653442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2192246913909912, "epoch": 3.14, "learning_rate": 2.7551020408163265e-05, "loss": 0.7528, "step": 3718, "task_loss": 1.312962293624878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39314278960227966, "epoch": 3.14, "learning_rate": 2.7544982490037436e-05, "loss": 0.5693, "step": 3719, "task_loss": 0.6926872134208679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.443636417388916, "epoch": 3.14, "learning_rate": 2.7538944571911606e-05, "loss": 0.525, "step": 3720, "task_loss": 0.49242866039276123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7972447276115417, "epoch": 3.15, "learning_rate": 2.7532906653785777e-05, "loss": 0.6394, "step": 3721, "task_loss": 1.440778374671936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5316053628921509, "epoch": 3.15, "learning_rate": 2.7526868735659944e-05, "loss": 0.7658, "step": 3722, "task_loss": 1.345367670059204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8069190382957458, "epoch": 3.15, "learning_rate": 2.7520830817534115e-05, "loss": 0.5788, "step": 3723, "task_loss": 0.955371618270874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3065193891525269, "epoch": 3.15, "learning_rate": 2.751479289940829e-05, "loss": 0.8439, "step": 3724, "task_loss": 0.4560190737247467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6988447308540344, "epoch": 3.15, "learning_rate": 2.7508754981282452e-05, "loss": 0.7659, "step": 3725, "task_loss": 0.4240686297416687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8805952072143555, "epoch": 3.15, "learning_rate": 2.7502717063156623e-05, "loss": 0.7504, "step": 3726, "task_loss": 1.2965900897979736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49975308775901794, "epoch": 3.15, "learning_rate": 2.7496679145030797e-05, "loss": 0.5595, "step": 3727, "task_loss": 0.5324035286903381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6466822028160095, "epoch": 3.15, "learning_rate": 2.749064122690496e-05, "loss": 0.7106, "step": 3728, "task_loss": 0.5854632258415222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40485233068466187, "epoch": 3.15, "learning_rate": 2.7484603308779135e-05, "loss": 0.583, "step": 3729, "task_loss": 0.2413056492805481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.354900598526001, "epoch": 3.15, "learning_rate": 2.7478565390653305e-05, "loss": 0.5161, "step": 3730, "task_loss": 0.8037174344062805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7486984729766846, "epoch": 3.15, "learning_rate": 2.7472527472527476e-05, "loss": 0.6241, "step": 3731, "task_loss": 0.6723860502243042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4793328642845154, "epoch": 3.15, "learning_rate": 2.7466489554401643e-05, "loss": 0.683, "step": 3732, "task_loss": 0.4852195978164673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1061885356903076, "epoch": 3.16, "learning_rate": 2.7460451636275814e-05, "loss": 0.7154, "step": 3733, "task_loss": 0.9783468246459961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42131149768829346, "epoch": 3.16, "learning_rate": 2.7454413718149984e-05, "loss": 0.5399, "step": 3734, "task_loss": 0.7833398580551147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7367215156555176, "epoch": 3.16, "learning_rate": 2.744837580002415e-05, "loss": 0.5475, "step": 3735, "task_loss": 1.0372810363769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.408748060464859, "epoch": 3.16, "learning_rate": 2.7442337881898322e-05, "loss": 0.5638, "step": 3736, "task_loss": 1.0570927858352661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6918579339981079, "epoch": 3.16, "learning_rate": 2.7436299963772493e-05, "loss": 0.6966, "step": 3737, "task_loss": 0.40757429599761963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7135032415390015, "epoch": 3.16, "learning_rate": 2.743026204564666e-05, "loss": 0.6963, "step": 3738, "task_loss": 1.2679206132888794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27400535345077515, "epoch": 3.16, "learning_rate": 2.742422412752083e-05, "loss": 0.6275, "step": 3739, "task_loss": 0.19586075842380524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4530479609966278, "epoch": 3.16, "learning_rate": 2.7418186209395004e-05, "loss": 0.6745, "step": 3740, "task_loss": 1.4297794103622437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7493981122970581, "epoch": 3.16, "learning_rate": 2.7412148291269175e-05, "loss": 0.6999, "step": 3741, "task_loss": 0.7350195646286011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7677335739135742, "epoch": 3.16, "learning_rate": 2.740611037314334e-05, "loss": 0.8136, "step": 3742, "task_loss": 1.293740153312683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47273939847946167, "epoch": 3.16, "learning_rate": 2.7400072455017513e-05, "loss": 0.6162, "step": 3743, "task_loss": 0.953671395778656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6389205455780029, "epoch": 3.16, "learning_rate": 2.7394034536891683e-05, "loss": 0.6417, "step": 3744, "task_loss": 1.13491690158844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0024096965789795, "epoch": 3.17, "learning_rate": 2.738799661876585e-05, "loss": 0.7572, "step": 3745, "task_loss": 0.8565533757209778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6544901132583618, "epoch": 3.17, "learning_rate": 2.738195870064002e-05, "loss": 0.7463, "step": 3746, "task_loss": 0.6628578305244446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8597802519798279, "epoch": 3.17, "learning_rate": 2.737592078251419e-05, "loss": 0.7201, "step": 3747, "task_loss": 0.8674101829528809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36789387464523315, "epoch": 3.17, "learning_rate": 2.736988286438836e-05, "loss": 0.6084, "step": 3748, "task_loss": 1.1345446109771729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7231370210647583, "epoch": 3.17, "learning_rate": 2.736384494626253e-05, "loss": 0.8606, "step": 3749, "task_loss": 0.6843928098678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5862494111061096, "epoch": 3.17, "learning_rate": 2.73578070281367e-05, "loss": 0.5978, "step": 3750, "task_loss": 0.4334567189216614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38852691650390625, "epoch": 3.17, "learning_rate": 2.735176911001087e-05, "loss": 0.5267, "step": 3751, "task_loss": 0.3833361566066742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36267077922821045, "epoch": 3.17, "learning_rate": 2.7345731191885038e-05, "loss": 0.5862, "step": 3752, "task_loss": 0.17595958709716797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47033047676086426, "epoch": 3.17, "learning_rate": 2.7339693273759208e-05, "loss": 0.6806, "step": 3753, "task_loss": 0.7861241102218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3805367350578308, "epoch": 3.17, "learning_rate": 2.7333655355633382e-05, "loss": 0.6022, "step": 3754, "task_loss": 1.2618480920791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5910431146621704, "epoch": 3.17, "learning_rate": 2.7327617437507546e-05, "loss": 0.6079, "step": 3755, "task_loss": 1.083288311958313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3335961699485779, "epoch": 3.17, "learning_rate": 2.7321579519381716e-05, "loss": 0.4733, "step": 3756, "task_loss": 0.08060088753700256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6208626627922058, "epoch": 3.18, "learning_rate": 2.731554160125589e-05, "loss": 0.7472, "step": 3757, "task_loss": 0.7524503469467163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.548250675201416, "epoch": 3.18, "learning_rate": 2.7309503683130054e-05, "loss": 0.4952, "step": 3758, "task_loss": 0.8591022491455078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.558382511138916, "epoch": 3.18, "learning_rate": 2.7303465765004228e-05, "loss": 0.6555, "step": 3759, "task_loss": 1.2326316833496094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0760020017623901, "epoch": 3.18, "learning_rate": 2.72974278468784e-05, "loss": 0.7811, "step": 3760, "task_loss": 0.47576460242271423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6894491910934448, "epoch": 3.18, "learning_rate": 2.729138992875257e-05, "loss": 0.7841, "step": 3761, "task_loss": 0.5184053182601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49788907170295715, "epoch": 3.18, "learning_rate": 2.7285352010626736e-05, "loss": 0.5524, "step": 3762, "task_loss": 0.5793148875236511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6055487394332886, "epoch": 3.18, "learning_rate": 2.7279314092500907e-05, "loss": 0.5082, "step": 3763, "task_loss": 0.40051424503326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5558630228042603, "epoch": 3.18, "learning_rate": 2.7273276174375078e-05, "loss": 0.6046, "step": 3764, "task_loss": 1.1657373905181885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4367152154445648, "epoch": 3.18, "learning_rate": 2.7267238256249245e-05, "loss": 0.7325, "step": 3765, "task_loss": 0.3441525995731354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6355109214782715, "epoch": 3.18, "learning_rate": 2.7261200338123415e-05, "loss": 0.6441, "step": 3766, "task_loss": 0.945931077003479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.860523521900177, "epoch": 3.18, "learning_rate": 2.7255162419997586e-05, "loss": 0.6012, "step": 3767, "task_loss": 1.1297026872634888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7198249697685242, "epoch": 3.19, "learning_rate": 2.7249124501871753e-05, "loss": 0.5669, "step": 3768, "task_loss": 1.3856337070465088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9888213872909546, "epoch": 3.19, "learning_rate": 2.7243086583745924e-05, "loss": 0.721, "step": 3769, "task_loss": 1.0374499559402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5991783738136292, "epoch": 3.19, "learning_rate": 2.7237048665620098e-05, "loss": 0.7023, "step": 3770, "task_loss": 0.48877325654029846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9571166038513184, "epoch": 3.19, "learning_rate": 2.7231010747494268e-05, "loss": 0.8814, "step": 3771, "task_loss": 1.5043777227401733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36226242780685425, "epoch": 3.19, "learning_rate": 2.7224972829368432e-05, "loss": 0.5564, "step": 3772, "task_loss": 0.6930634379386902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5540496706962585, "epoch": 3.19, "learning_rate": 2.7218934911242606e-05, "loss": 0.6657, "step": 3773, "task_loss": 1.5520204305648804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3817112147808075, "epoch": 3.19, "learning_rate": 2.7212896993116777e-05, "loss": 0.5759, "step": 3774, "task_loss": 0.9813417196273804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.816502571105957, "epoch": 3.19, "learning_rate": 2.7206859074990944e-05, "loss": 0.8392, "step": 3775, "task_loss": 1.1619873046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3911612629890442, "epoch": 3.19, "learning_rate": 2.7200821156865114e-05, "loss": 0.6507, "step": 3776, "task_loss": 0.624571681022644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.564736545085907, "epoch": 3.19, "learning_rate": 2.7194783238739285e-05, "loss": 0.5966, "step": 3777, "task_loss": 0.5943394899368286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7511274814605713, "epoch": 3.19, "learning_rate": 2.7188745320613452e-05, "loss": 0.6949, "step": 3778, "task_loss": 0.5647209882736206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4479868710041046, "epoch": 3.19, "learning_rate": 2.7182707402487623e-05, "loss": 0.5585, "step": 3779, "task_loss": 0.3722969591617584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5413883924484253, "epoch": 3.2, "learning_rate": 2.7176669484361793e-05, "loss": 0.6911, "step": 3780, "task_loss": 1.143428087234497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5554563999176025, "epoch": 3.2, "learning_rate": 2.7170631566235967e-05, "loss": 0.6162, "step": 3781, "task_loss": 0.4169100224971771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8574235439300537, "epoch": 3.2, "learning_rate": 2.716459364811013e-05, "loss": 0.7371, "step": 3782, "task_loss": 1.971543788909912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7909473180770874, "epoch": 3.2, "learning_rate": 2.71585557299843e-05, "loss": 0.7167, "step": 3783, "task_loss": 0.8002767562866211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420628190040588, "epoch": 3.2, "learning_rate": 2.7152517811858476e-05, "loss": 0.6198, "step": 3784, "task_loss": 0.4915619492530823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9211279153823853, "epoch": 3.2, "learning_rate": 2.714647989373264e-05, "loss": 0.5692, "step": 3785, "task_loss": 1.0802326202392578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5529296398162842, "epoch": 3.2, "learning_rate": 2.7140441975606813e-05, "loss": 0.5217, "step": 3786, "task_loss": 0.28511202335357666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4289717972278595, "epoch": 3.2, "learning_rate": 2.7134404057480984e-05, "loss": 0.4296, "step": 3787, "task_loss": 0.6858624219894409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.385250449180603, "epoch": 3.2, "learning_rate": 2.7128366139355148e-05, "loss": 0.6946, "step": 3788, "task_loss": 0.7283570766448975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5088664293289185, "epoch": 3.2, "learning_rate": 2.712232822122932e-05, "loss": 0.6174, "step": 3789, "task_loss": 0.7653895020484924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0906836986541748, "epoch": 3.2, "learning_rate": 2.7116290303103492e-05, "loss": 0.7458, "step": 3790, "task_loss": 0.7550229430198669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5201867818832397, "epoch": 3.2, "learning_rate": 2.7110252384977663e-05, "loss": 0.5505, "step": 3791, "task_loss": 0.5186569690704346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5407899022102356, "epoch": 3.21, "learning_rate": 2.710421446685183e-05, "loss": 0.5613, "step": 3792, "task_loss": 0.4091385304927826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7789901494979858, "epoch": 3.21, "learning_rate": 2.7098176548726e-05, "loss": 0.6923, "step": 3793, "task_loss": 1.0979026556015015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.549785852432251, "epoch": 3.21, "learning_rate": 2.709213863060017e-05, "loss": 0.6122, "step": 3794, "task_loss": 0.22662149369716644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5956298112869263, "epoch": 3.21, "learning_rate": 2.7086100712474338e-05, "loss": 0.5924, "step": 3795, "task_loss": 1.675180435180664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4998801052570343, "epoch": 3.21, "learning_rate": 2.708006279434851e-05, "loss": 0.5475, "step": 3796, "task_loss": 0.9572819471359253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6800335645675659, "epoch": 3.21, "learning_rate": 2.7074024876222683e-05, "loss": 0.591, "step": 3797, "task_loss": 0.7212218046188354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27396222949028015, "epoch": 3.21, "learning_rate": 2.7067986958096847e-05, "loss": 0.5571, "step": 3798, "task_loss": 0.26156508922576904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7810419797897339, "epoch": 3.21, "learning_rate": 2.7061949039971017e-05, "loss": 0.8228, "step": 3799, "task_loss": 0.45833438634872437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7736960649490356, "epoch": 3.21, "learning_rate": 2.705591112184519e-05, "loss": 0.6531, "step": 3800, "task_loss": 0.587170422077179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6723458766937256, "epoch": 3.21, "learning_rate": 2.704987320371936e-05, "loss": 0.7676, "step": 3801, "task_loss": 0.5839070677757263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4362712502479553, "epoch": 3.21, "learning_rate": 2.704383528559353e-05, "loss": 0.7637, "step": 3802, "task_loss": 0.647459864616394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5533900260925293, "epoch": 3.21, "learning_rate": 2.70377973674677e-05, "loss": 0.6052, "step": 3803, "task_loss": 0.6986656188964844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4195314347743988, "epoch": 3.22, "learning_rate": 2.703175944934187e-05, "loss": 0.7278, "step": 3804, "task_loss": 1.187132716178894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.763897180557251, "epoch": 3.22, "learning_rate": 2.7025721531216037e-05, "loss": 0.7961, "step": 3805, "task_loss": 0.42536187171936035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5210390686988831, "epoch": 3.22, "learning_rate": 2.7019683613090208e-05, "loss": 0.5256, "step": 3806, "task_loss": 0.4214737117290497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2576864957809448, "epoch": 3.22, "learning_rate": 2.701364569496438e-05, "loss": 0.5541, "step": 3807, "task_loss": 0.6714580655097961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4546336233615875, "epoch": 3.22, "learning_rate": 2.7007607776838545e-05, "loss": 0.6218, "step": 3808, "task_loss": 0.6319274306297302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.040923833847046, "epoch": 3.22, "learning_rate": 2.7001569858712716e-05, "loss": 0.9007, "step": 3809, "task_loss": 0.5754467248916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5205569863319397, "epoch": 3.22, "learning_rate": 2.6995531940586887e-05, "loss": 0.5994, "step": 3810, "task_loss": 0.7320914268493652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7630859613418579, "epoch": 3.22, "learning_rate": 2.698949402246106e-05, "loss": 0.5938, "step": 3811, "task_loss": 1.2972216606140137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27662238478660583, "epoch": 3.22, "learning_rate": 2.6983456104335224e-05, "loss": 0.5458, "step": 3812, "task_loss": 0.24180002510547638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4288104772567749, "epoch": 3.22, "learning_rate": 2.6977418186209395e-05, "loss": 0.5961, "step": 3813, "task_loss": 0.29090648889541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1962366104125977, "epoch": 3.22, "learning_rate": 2.697138026808357e-05, "loss": 0.6706, "step": 3814, "task_loss": 1.3018921613693237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6922852396965027, "epoch": 3.22, "learning_rate": 2.6965342349957733e-05, "loss": 0.7684, "step": 3815, "task_loss": 1.7663601636886597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8151668310165405, "epoch": 3.23, "learning_rate": 2.6959304431831907e-05, "loss": 0.9317, "step": 3816, "task_loss": 1.3193262815475464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5841848254203796, "epoch": 3.23, "learning_rate": 2.6953266513706077e-05, "loss": 0.5518, "step": 3817, "task_loss": 0.8994879126548767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6144908666610718, "epoch": 3.23, "learning_rate": 2.6947228595580244e-05, "loss": 0.7568, "step": 3818, "task_loss": 0.9236342906951904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7763292789459229, "epoch": 3.23, "learning_rate": 2.6941190677454415e-05, "loss": 0.5247, "step": 3819, "task_loss": 0.7072175145149231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7275692224502563, "epoch": 3.23, "learning_rate": 2.6935152759328586e-05, "loss": 0.6969, "step": 3820, "task_loss": 1.620560884475708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8345743417739868, "epoch": 3.23, "learning_rate": 2.6929114841202756e-05, "loss": 0.7907, "step": 3821, "task_loss": 1.094906210899353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8300758600234985, "epoch": 3.23, "learning_rate": 2.6923076923076923e-05, "loss": 0.7788, "step": 3822, "task_loss": 1.9478230476379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5707115530967712, "epoch": 3.23, "learning_rate": 2.6917039004951094e-05, "loss": 0.618, "step": 3823, "task_loss": 0.5562289953231812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42580896615982056, "epoch": 3.23, "learning_rate": 2.6911001086825264e-05, "loss": 0.7177, "step": 3824, "task_loss": 0.5858167409896851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2559913992881775, "epoch": 3.23, "learning_rate": 2.690496316869943e-05, "loss": 0.6197, "step": 3825, "task_loss": 0.1804138869047165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42227593064308167, "epoch": 3.23, "learning_rate": 2.6898925250573602e-05, "loss": 0.54, "step": 3826, "task_loss": 0.5198736190795898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6105734705924988, "epoch": 3.23, "learning_rate": 2.6892887332447776e-05, "loss": 0.6633, "step": 3827, "task_loss": 0.24023839831352234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7365256547927856, "epoch": 3.24, "learning_rate": 2.688684941432194e-05, "loss": 0.643, "step": 3828, "task_loss": 0.6190977096557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5054594278335571, "epoch": 3.24, "learning_rate": 2.688081149619611e-05, "loss": 0.5725, "step": 3829, "task_loss": 0.46155068278312683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4077319800853729, "epoch": 3.24, "learning_rate": 2.6874773578070285e-05, "loss": 0.5678, "step": 3830, "task_loss": 0.4196246862411499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3718378245830536, "epoch": 3.24, "learning_rate": 2.6868735659944455e-05, "loss": 0.5403, "step": 3831, "task_loss": 0.5666140913963318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9335667490959167, "epoch": 3.24, "learning_rate": 2.6862697741818622e-05, "loss": 0.6919, "step": 3832, "task_loss": 1.687600016593933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9736859798431396, "epoch": 3.24, "learning_rate": 2.6856659823692793e-05, "loss": 0.5869, "step": 3833, "task_loss": 0.5775043368339539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4921509027481079, "epoch": 3.24, "learning_rate": 2.6850621905566963e-05, "loss": 0.6274, "step": 3834, "task_loss": 1.1227810382843018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5715256929397583, "epoch": 3.24, "learning_rate": 2.684458398744113e-05, "loss": 0.7426, "step": 3835, "task_loss": 1.2261786460876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8505398035049438, "epoch": 3.24, "learning_rate": 2.68385460693153e-05, "loss": 0.655, "step": 3836, "task_loss": 1.1689717769622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42035973072052, "epoch": 3.24, "learning_rate": 2.6832508151189472e-05, "loss": 0.5447, "step": 3837, "task_loss": 0.6019532680511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5325599908828735, "epoch": 3.24, "learning_rate": 2.682647023306364e-05, "loss": 0.5946, "step": 3838, "task_loss": 0.3388572335243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2684626877307892, "epoch": 3.24, "learning_rate": 2.682043231493781e-05, "loss": 0.706, "step": 3839, "task_loss": 0.2648860216140747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39670324325561523, "epoch": 3.25, "learning_rate": 2.681439439681198e-05, "loss": 0.4397, "step": 3840, "task_loss": 0.7919542789459229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8736084699630737, "epoch": 3.25, "learning_rate": 2.6808356478686154e-05, "loss": 0.6537, "step": 3841, "task_loss": 1.3946781158447266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6007811427116394, "epoch": 3.25, "learning_rate": 2.6802318560560318e-05, "loss": 0.4864, "step": 3842, "task_loss": 0.7827214598655701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.9908289909362793, "epoch": 3.25, "learning_rate": 2.6796280642434492e-05, "loss": 0.9942, "step": 3843, "task_loss": 1.1989620923995972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5278843641281128, "epoch": 3.25, "learning_rate": 2.6790242724308662e-05, "loss": 0.6177, "step": 3844, "task_loss": 0.8241632580757141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5054795742034912, "epoch": 3.25, "learning_rate": 2.6784204806182826e-05, "loss": 0.6058, "step": 3845, "task_loss": 0.39002248644828796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3030523657798767, "epoch": 3.25, "learning_rate": 2.6778166888057e-05, "loss": 0.4318, "step": 3846, "task_loss": 0.26502785086631775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4107990264892578, "epoch": 3.25, "learning_rate": 2.677212896993117e-05, "loss": 1.0368, "step": 3847, "task_loss": 1.201445460319519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.507205605506897, "epoch": 3.25, "learning_rate": 2.6766091051805338e-05, "loss": 0.6553, "step": 3848, "task_loss": 1.0373141765594482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5042811632156372, "epoch": 3.25, "learning_rate": 2.676005313367951e-05, "loss": 0.8051, "step": 3849, "task_loss": 1.494383454322815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24265123903751373, "epoch": 3.25, "learning_rate": 2.675401521555368e-05, "loss": 0.4407, "step": 3850, "task_loss": 0.044261109083890915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8958820104598999, "epoch": 3.26, "learning_rate": 2.674797729742785e-05, "loss": 0.584, "step": 3851, "task_loss": 0.5287253856658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6337130665779114, "epoch": 3.26, "learning_rate": 2.6741939379302017e-05, "loss": 0.7723, "step": 3852, "task_loss": 0.7469601035118103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7162208557128906, "epoch": 3.26, "learning_rate": 2.6735901461176187e-05, "loss": 0.5828, "step": 3853, "task_loss": 0.330293744802475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6202374696731567, "epoch": 3.26, "learning_rate": 2.672986354305036e-05, "loss": 0.903, "step": 3854, "task_loss": 1.0988820791244507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1776304244995117, "epoch": 3.26, "learning_rate": 2.6723825624924525e-05, "loss": 0.8453, "step": 3855, "task_loss": 1.2135603427886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4645193815231323, "epoch": 3.26, "learning_rate": 2.6717787706798696e-05, "loss": 0.4957, "step": 3856, "task_loss": 1.6712590456008911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5928220748901367, "epoch": 3.26, "learning_rate": 2.671174978867287e-05, "loss": 0.4815, "step": 3857, "task_loss": 0.3807794451713562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.701501190662384, "epoch": 3.26, "learning_rate": 2.6705711870547033e-05, "loss": 0.6177, "step": 3858, "task_loss": 1.458693265914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9499773979187012, "epoch": 3.26, "learning_rate": 2.6699673952421207e-05, "loss": 0.6705, "step": 3859, "task_loss": 0.662256121635437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37579917907714844, "epoch": 3.26, "learning_rate": 2.6693636034295378e-05, "loss": 0.5736, "step": 3860, "task_loss": 0.6692442297935486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3625980615615845, "epoch": 3.26, "learning_rate": 2.668759811616955e-05, "loss": 0.7529, "step": 3861, "task_loss": 1.7739009857177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36655858159065247, "epoch": 3.26, "learning_rate": 2.6681560198043716e-05, "loss": 0.5743, "step": 3862, "task_loss": 0.3933911919593811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48964786529541016, "epoch": 3.27, "learning_rate": 2.6675522279917886e-05, "loss": 0.5849, "step": 3863, "task_loss": 0.4818389117717743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3185902237892151, "epoch": 3.27, "learning_rate": 2.6669484361792057e-05, "loss": 0.4199, "step": 3864, "task_loss": 0.4691984951496124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4040016531944275, "epoch": 3.27, "learning_rate": 2.6663446443666224e-05, "loss": 0.5411, "step": 3865, "task_loss": 0.5929793119430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4796018600463867, "epoch": 3.27, "learning_rate": 2.6657408525540395e-05, "loss": 0.6123, "step": 3866, "task_loss": 0.5219969749450684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4811915457248688, "epoch": 3.27, "learning_rate": 2.6651370607414565e-05, "loss": 0.6456, "step": 3867, "task_loss": 0.44106876850128174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37166595458984375, "epoch": 3.27, "learning_rate": 2.6645332689288732e-05, "loss": 0.4433, "step": 3868, "task_loss": 0.21932142972946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.088750958442688, "epoch": 3.27, "learning_rate": 2.6639294771162903e-05, "loss": 0.7231, "step": 3869, "task_loss": 1.2116187810897827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6861915588378906, "epoch": 3.27, "learning_rate": 2.6633256853037077e-05, "loss": 0.8649, "step": 3870, "task_loss": 0.9740768671035767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2734610438346863, "epoch": 3.27, "learning_rate": 2.6627218934911247e-05, "loss": 0.5702, "step": 3871, "task_loss": 1.1948003768920898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7619774341583252, "epoch": 3.27, "learning_rate": 2.662118101678541e-05, "loss": 0.7651, "step": 3872, "task_loss": 0.7293266654014587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.967250406742096, "epoch": 3.27, "learning_rate": 2.6615143098659585e-05, "loss": 0.7331, "step": 3873, "task_loss": 0.7539423108100891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39383089542388916, "epoch": 3.27, "learning_rate": 2.6609105180533756e-05, "loss": 0.4121, "step": 3874, "task_loss": 0.31008198857307434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3484002351760864, "epoch": 3.28, "learning_rate": 2.6603067262407923e-05, "loss": 0.8422, "step": 3875, "task_loss": 0.6652863025665283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6814296245574951, "epoch": 3.28, "learning_rate": 2.6597029344282094e-05, "loss": 0.629, "step": 3876, "task_loss": 0.7861500978469849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5805301070213318, "epoch": 3.28, "learning_rate": 2.6590991426156264e-05, "loss": 0.7727, "step": 3877, "task_loss": 0.4359031617641449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6087177991867065, "epoch": 3.28, "learning_rate": 2.658495350803043e-05, "loss": 0.5661, "step": 3878, "task_loss": 1.3420968055725098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8411298394203186, "epoch": 3.28, "learning_rate": 2.6578915589904602e-05, "loss": 0.5906, "step": 3879, "task_loss": 0.3424343168735504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8254624009132385, "epoch": 3.28, "learning_rate": 2.6572877671778772e-05, "loss": 0.743, "step": 3880, "task_loss": 2.4015679359436035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3090619146823883, "epoch": 3.28, "learning_rate": 2.656683975365294e-05, "loss": 0.5331, "step": 3881, "task_loss": 0.7764829993247986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5496142506599426, "epoch": 3.28, "learning_rate": 2.656080183552711e-05, "loss": 0.7116, "step": 3882, "task_loss": 0.4755264222621918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7677972316741943, "epoch": 3.28, "learning_rate": 2.655476391740128e-05, "loss": 0.6032, "step": 3883, "task_loss": 0.805168092250824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5757544040679932, "epoch": 3.28, "learning_rate": 2.6548725999275455e-05, "loss": 0.7057, "step": 3884, "task_loss": 1.3406319618225098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.507298469543457, "epoch": 3.28, "learning_rate": 2.654268808114962e-05, "loss": 0.6396, "step": 3885, "task_loss": 0.09997845441102982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9478985667228699, "epoch": 3.28, "learning_rate": 2.653665016302379e-05, "loss": 0.8329, "step": 3886, "task_loss": 0.9102082848548889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6417235136032104, "epoch": 3.29, "learning_rate": 2.6530612244897963e-05, "loss": 0.6198, "step": 3887, "task_loss": 0.5683102011680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.791704535484314, "epoch": 3.29, "learning_rate": 2.6524574326772127e-05, "loss": 0.5936, "step": 3888, "task_loss": 1.0623878240585327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6217017769813538, "epoch": 3.29, "learning_rate": 2.65185364086463e-05, "loss": 0.6718, "step": 3889, "task_loss": 1.547390341758728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5323266983032227, "epoch": 3.29, "learning_rate": 2.651249849052047e-05, "loss": 0.6094, "step": 3890, "task_loss": 0.9643723964691162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4152616858482361, "epoch": 3.29, "learning_rate": 2.650646057239464e-05, "loss": 0.5605, "step": 3891, "task_loss": 0.43540284037590027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4223644733428955, "epoch": 3.29, "learning_rate": 2.650042265426881e-05, "loss": 0.485, "step": 3892, "task_loss": 0.5213735699653625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4909425377845764, "epoch": 3.29, "learning_rate": 2.649438473614298e-05, "loss": 0.5734, "step": 3893, "task_loss": 1.2381582260131836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41863974928855896, "epoch": 3.29, "learning_rate": 2.648834681801715e-05, "loss": 0.5931, "step": 3894, "task_loss": 0.4961846172809601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4648692011833191, "epoch": 3.29, "learning_rate": 2.6482308899891317e-05, "loss": 0.5361, "step": 3895, "task_loss": 0.6191472411155701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28473180532455444, "epoch": 3.29, "learning_rate": 2.6476270981765488e-05, "loss": 0.5372, "step": 3896, "task_loss": 0.705217182636261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.065242886543274, "epoch": 3.29, "learning_rate": 2.647023306363966e-05, "loss": 0.8353, "step": 3897, "task_loss": 1.787498950958252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31734955310821533, "epoch": 3.29, "learning_rate": 2.6464195145513826e-05, "loss": 0.5276, "step": 3898, "task_loss": 0.9646798968315125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4767318665981293, "epoch": 3.3, "learning_rate": 2.6458157227387996e-05, "loss": 0.7424, "step": 3899, "task_loss": 1.5132758617401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6761322021484375, "epoch": 3.3, "learning_rate": 2.645211930926217e-05, "loss": 0.5488, "step": 3900, "task_loss": 1.4400570392608643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4853355884552002, "epoch": 3.3, "learning_rate": 2.6446081391136334e-05, "loss": 0.5332, "step": 3901, "task_loss": 0.6799299716949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7361021041870117, "epoch": 3.3, "learning_rate": 2.6440043473010505e-05, "loss": 0.6561, "step": 3902, "task_loss": 1.4156526327133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46797269582748413, "epoch": 3.3, "learning_rate": 2.643400555488468e-05, "loss": 0.6549, "step": 3903, "task_loss": 1.5360777378082275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9150091409683228, "epoch": 3.3, "learning_rate": 2.642796763675885e-05, "loss": 0.8199, "step": 3904, "task_loss": 1.369651198387146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8130684494972229, "epoch": 3.3, "learning_rate": 2.6421929718633016e-05, "loss": 0.6326, "step": 3905, "task_loss": 0.7818535566329956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1520894765853882, "epoch": 3.3, "learning_rate": 2.6415891800507187e-05, "loss": 0.6707, "step": 3906, "task_loss": 1.1094388961791992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8521507382392883, "epoch": 3.3, "learning_rate": 2.6409853882381357e-05, "loss": 0.6868, "step": 3907, "task_loss": 1.0143516063690186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7187873125076294, "epoch": 3.3, "learning_rate": 2.6403815964255525e-05, "loss": 0.7431, "step": 3908, "task_loss": 0.811107873916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6099079847335815, "epoch": 3.3, "learning_rate": 2.6397778046129695e-05, "loss": 0.689, "step": 3909, "task_loss": 0.9018011689186096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7623339295387268, "epoch": 3.3, "learning_rate": 2.6391740128003866e-05, "loss": 0.7054, "step": 3910, "task_loss": 0.3750118017196655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.243570014834404, "epoch": 3.31, "learning_rate": 2.6385702209878033e-05, "loss": 0.6818, "step": 3911, "task_loss": 0.06974969059228897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.707797646522522, "epoch": 3.31, "learning_rate": 2.6379664291752204e-05, "loss": 0.8053, "step": 3912, "task_loss": 0.5301773548126221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5330283641815186, "epoch": 3.31, "learning_rate": 2.6373626373626374e-05, "loss": 0.6056, "step": 3913, "task_loss": 0.7068347334861755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3029429316520691, "epoch": 3.31, "learning_rate": 2.6367588455500548e-05, "loss": 0.5441, "step": 3914, "task_loss": 0.08207356184720993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5069031715393066, "epoch": 3.31, "learning_rate": 2.6361550537374712e-05, "loss": 0.4646, "step": 3915, "task_loss": 0.44526171684265137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6169978380203247, "epoch": 3.31, "learning_rate": 2.6355512619248886e-05, "loss": 0.5319, "step": 3916, "task_loss": 0.8421952724456787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47404229640960693, "epoch": 3.31, "learning_rate": 2.6349474701123056e-05, "loss": 0.8043, "step": 3917, "task_loss": 0.4813024401664734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4884621202945709, "epoch": 3.31, "learning_rate": 2.634343678299722e-05, "loss": 0.6651, "step": 3918, "task_loss": 0.46151483058929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3522825241088867, "epoch": 3.31, "learning_rate": 2.6337398864871394e-05, "loss": 0.5168, "step": 3919, "task_loss": 1.1924980878829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5214714407920837, "epoch": 3.31, "learning_rate": 2.6331360946745565e-05, "loss": 0.5938, "step": 3920, "task_loss": 0.6074795126914978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9342019557952881, "epoch": 3.31, "learning_rate": 2.6325323028619732e-05, "loss": 0.8019, "step": 3921, "task_loss": 0.19990885257720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5792969465255737, "epoch": 3.32, "learning_rate": 2.6319285110493903e-05, "loss": 0.6307, "step": 3922, "task_loss": 1.1586164236068726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5641287565231323, "epoch": 3.32, "learning_rate": 2.6313247192368073e-05, "loss": 0.6074, "step": 3923, "task_loss": 1.4743331670761108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.674136221408844, "epoch": 3.32, "learning_rate": 2.6307209274242244e-05, "loss": 0.6302, "step": 3924, "task_loss": 1.9525656700134277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33975762128829956, "epoch": 3.32, "learning_rate": 2.630117135611641e-05, "loss": 0.6684, "step": 3925, "task_loss": 0.7077212929725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5714633464813232, "epoch": 3.32, "learning_rate": 2.629513343799058e-05, "loss": 0.6142, "step": 3926, "task_loss": 0.48205330967903137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4228416085243225, "epoch": 3.32, "learning_rate": 2.6289095519864755e-05, "loss": 0.5963, "step": 3927, "task_loss": 0.2441185861825943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5860397815704346, "epoch": 3.32, "learning_rate": 2.628305760173892e-05, "loss": 0.5328, "step": 3928, "task_loss": 0.4942067861557007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2570850849151611, "epoch": 3.32, "learning_rate": 2.627701968361309e-05, "loss": 0.7698, "step": 3929, "task_loss": 0.8795751333236694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40958184003829956, "epoch": 3.32, "learning_rate": 2.6270981765487264e-05, "loss": 0.5681, "step": 3930, "task_loss": 0.7087894082069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49124422669410706, "epoch": 3.32, "learning_rate": 2.6264943847361427e-05, "loss": 0.5641, "step": 3931, "task_loss": 0.25018155574798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5732605457305908, "epoch": 3.32, "learning_rate": 2.62589059292356e-05, "loss": 0.5875, "step": 3932, "task_loss": 0.1319923847913742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5042264461517334, "epoch": 3.32, "learning_rate": 2.6252868011109772e-05, "loss": 0.6762, "step": 3933, "task_loss": 0.7675753831863403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7829902172088623, "epoch": 3.33, "learning_rate": 2.6246830092983943e-05, "loss": 0.5766, "step": 3934, "task_loss": 0.9722535014152527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5832582712173462, "epoch": 3.33, "learning_rate": 2.624079217485811e-05, "loss": 0.6383, "step": 3935, "task_loss": 0.5819710493087769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5074308514595032, "epoch": 3.33, "learning_rate": 2.623475425673228e-05, "loss": 0.7085, "step": 3936, "task_loss": 1.235790491104126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47025662660598755, "epoch": 3.33, "learning_rate": 2.622871633860645e-05, "loss": 0.547, "step": 3937, "task_loss": 0.3245030641555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5935146808624268, "epoch": 3.33, "learning_rate": 2.6222678420480618e-05, "loss": 0.633, "step": 3938, "task_loss": 0.9082158207893372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5476855039596558, "epoch": 3.33, "learning_rate": 2.621664050235479e-05, "loss": 0.3847, "step": 3939, "task_loss": 0.7572515606880188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6679243445396423, "epoch": 3.33, "learning_rate": 2.621060258422896e-05, "loss": 0.6444, "step": 3940, "task_loss": 0.16115054488182068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36852210760116577, "epoch": 3.33, "learning_rate": 2.6204564666103126e-05, "loss": 0.6391, "step": 3941, "task_loss": 0.834308385848999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8478296399116516, "epoch": 3.33, "learning_rate": 2.6198526747977297e-05, "loss": 0.5716, "step": 3942, "task_loss": 1.5304219722747803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4448738992214203, "epoch": 3.33, "learning_rate": 2.6192488829851468e-05, "loss": 0.519, "step": 3943, "task_loss": 0.16034835577011108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5501660108566284, "epoch": 3.33, "learning_rate": 2.618645091172564e-05, "loss": 0.5141, "step": 3944, "task_loss": 1.425825595855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2517452538013458, "epoch": 3.33, "learning_rate": 2.6180412993599805e-05, "loss": 0.578, "step": 3945, "task_loss": 0.689324676990509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7069664001464844, "epoch": 3.34, "learning_rate": 2.617437507547398e-05, "loss": 0.624, "step": 3946, "task_loss": 0.7517654299736023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43849098682403564, "epoch": 3.34, "learning_rate": 2.616833715734815e-05, "loss": 0.5039, "step": 3947, "task_loss": 0.15890027582645416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34070831537246704, "epoch": 3.34, "learning_rate": 2.6162299239222317e-05, "loss": 0.5948, "step": 3948, "task_loss": 0.4755551815032959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5503568649291992, "epoch": 3.34, "learning_rate": 2.6156261321096488e-05, "loss": 0.6308, "step": 3949, "task_loss": 0.5704407691955566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39978650212287903, "epoch": 3.34, "learning_rate": 2.6150223402970658e-05, "loss": 0.7811, "step": 3950, "task_loss": 0.5388373732566833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2862473726272583, "epoch": 3.34, "learning_rate": 2.6144185484844825e-05, "loss": 0.3045, "step": 3951, "task_loss": 0.8936640024185181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9583165049552917, "epoch": 3.34, "learning_rate": 2.6138147566718996e-05, "loss": 0.6627, "step": 3952, "task_loss": 1.2962121963500977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5983402132987976, "epoch": 3.34, "learning_rate": 2.6132109648593166e-05, "loss": 0.6773, "step": 3953, "task_loss": 0.20013566315174103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.671189546585083, "epoch": 3.34, "learning_rate": 2.6126071730467337e-05, "loss": 0.578, "step": 3954, "task_loss": 0.44599011540412903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4950448274612427, "epoch": 3.34, "learning_rate": 2.6120033812341504e-05, "loss": 0.7887, "step": 3955, "task_loss": 1.0941587686538696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5663962364196777, "epoch": 3.34, "learning_rate": 2.6113995894215675e-05, "loss": 0.5225, "step": 3956, "task_loss": 0.7047349810600281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5218271017074585, "epoch": 3.34, "learning_rate": 2.610795797608985e-05, "loss": 0.6904, "step": 3957, "task_loss": 0.5950703620910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6578766107559204, "epoch": 3.35, "learning_rate": 2.6101920057964013e-05, "loss": 0.7838, "step": 3958, "task_loss": 0.9733827114105225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5907174944877625, "epoch": 3.35, "learning_rate": 2.6095882139838183e-05, "loss": 0.7218, "step": 3959, "task_loss": 0.468867689371109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6422519087791443, "epoch": 3.35, "learning_rate": 2.6089844221712357e-05, "loss": 0.5173, "step": 3960, "task_loss": 0.11491935700178146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5063284635543823, "epoch": 3.35, "learning_rate": 2.608380630358652e-05, "loss": 0.644, "step": 3961, "task_loss": 0.7304311394691467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3663907051086426, "epoch": 3.35, "learning_rate": 2.6077768385460695e-05, "loss": 0.4794, "step": 3962, "task_loss": 0.44228166341781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4637787938117981, "epoch": 3.35, "learning_rate": 2.6071730467334865e-05, "loss": 0.6119, "step": 3963, "task_loss": 1.0656816959381104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5052222609519958, "epoch": 3.35, "learning_rate": 2.6065692549209036e-05, "loss": 0.653, "step": 3964, "task_loss": 0.8747231364250183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7805315256118774, "epoch": 3.35, "learning_rate": 2.6059654631083203e-05, "loss": 0.5161, "step": 3965, "task_loss": 0.6256901025772095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8468225002288818, "epoch": 3.35, "learning_rate": 2.6053616712957374e-05, "loss": 0.5186, "step": 3966, "task_loss": 0.7385403513908386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8091645240783691, "epoch": 3.35, "learning_rate": 2.6047578794831544e-05, "loss": 0.5935, "step": 3967, "task_loss": 2.0240392684936523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0832350254058838, "epoch": 3.35, "learning_rate": 2.604154087670571e-05, "loss": 0.8204, "step": 3968, "task_loss": 1.5370038747787476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49045324325561523, "epoch": 3.35, "learning_rate": 2.6035502958579882e-05, "loss": 0.5968, "step": 3969, "task_loss": 1.171879768371582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5738171935081482, "epoch": 3.36, "learning_rate": 2.6029465040454053e-05, "loss": 0.6297, "step": 3970, "task_loss": 0.08775167167186737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8335238099098206, "epoch": 3.36, "learning_rate": 2.602342712232822e-05, "loss": 0.7244, "step": 3971, "task_loss": 1.4658639430999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7119938135147095, "epoch": 3.36, "learning_rate": 2.601738920420239e-05, "loss": 0.6659, "step": 3972, "task_loss": 0.8703680038452148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39360442757606506, "epoch": 3.36, "learning_rate": 2.6011351286076564e-05, "loss": 0.7602, "step": 3973, "task_loss": 0.47015106678009033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6743742227554321, "epoch": 3.36, "learning_rate": 2.6005313367950735e-05, "loss": 0.6371, "step": 3974, "task_loss": 1.0348647832870483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31033754348754883, "epoch": 3.36, "learning_rate": 2.59992754498249e-05, "loss": 0.5168, "step": 3975, "task_loss": 0.9953373670578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0974111557006836, "epoch": 3.36, "learning_rate": 2.5993237531699073e-05, "loss": 0.7893, "step": 3976, "task_loss": 0.6379475593566895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4914453625679016, "epoch": 3.36, "learning_rate": 2.5987199613573243e-05, "loss": 0.6733, "step": 3977, "task_loss": 0.6056598424911499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42430174350738525, "epoch": 3.36, "learning_rate": 2.598116169544741e-05, "loss": 0.5241, "step": 3978, "task_loss": 0.5424736142158508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5069644451141357, "epoch": 3.36, "learning_rate": 2.597512377732158e-05, "loss": 0.6371, "step": 3979, "task_loss": 0.6619279384613037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.78520667552948, "epoch": 3.36, "learning_rate": 2.596908585919575e-05, "loss": 0.7728, "step": 3980, "task_loss": 0.43812429904937744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.316731333732605, "epoch": 3.36, "learning_rate": 2.596304794106992e-05, "loss": 0.639, "step": 3981, "task_loss": 0.12956060469150543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6818212270736694, "epoch": 3.37, "learning_rate": 2.595701002294409e-05, "loss": 0.5799, "step": 3982, "task_loss": 0.6076228022575378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.65285325050354, "epoch": 3.37, "learning_rate": 2.595097210481826e-05, "loss": 0.6448, "step": 3983, "task_loss": 0.876469075679779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6837882995605469, "epoch": 3.37, "learning_rate": 2.5944934186692434e-05, "loss": 0.6658, "step": 3984, "task_loss": 2.5778963565826416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4796503186225891, "epoch": 3.37, "learning_rate": 2.5938896268566598e-05, "loss": 0.6725, "step": 3985, "task_loss": 0.48694297671318054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46068698167800903, "epoch": 3.37, "learning_rate": 2.5932858350440768e-05, "loss": 0.6342, "step": 3986, "task_loss": 1.5280755758285522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5136503577232361, "epoch": 3.37, "learning_rate": 2.5926820432314942e-05, "loss": 0.623, "step": 3987, "task_loss": 0.7039515972137451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3499181270599365, "epoch": 3.37, "learning_rate": 2.5920782514189106e-05, "loss": 0.5471, "step": 3988, "task_loss": 0.26938170194625854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7411595582962036, "epoch": 3.37, "learning_rate": 2.591474459606328e-05, "loss": 0.6261, "step": 3989, "task_loss": 0.9704373478889465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6775782108306885, "epoch": 3.37, "learning_rate": 2.590870667793745e-05, "loss": 0.6907, "step": 3990, "task_loss": 1.0495789051055908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8890604972839355, "epoch": 3.37, "learning_rate": 2.5902668759811614e-05, "loss": 0.7579, "step": 3991, "task_loss": 0.6684366464614868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4833575487136841, "epoch": 3.37, "learning_rate": 2.5896630841685788e-05, "loss": 0.5072, "step": 3992, "task_loss": 0.5276519060134888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8928574919700623, "epoch": 3.38, "learning_rate": 2.589059292355996e-05, "loss": 0.7992, "step": 3993, "task_loss": 0.5278645753860474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1955161690711975, "epoch": 3.38, "learning_rate": 2.588455500543413e-05, "loss": 0.5209, "step": 3994, "task_loss": 0.2663785219192505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5638626217842102, "epoch": 3.38, "learning_rate": 2.5878517087308297e-05, "loss": 0.5906, "step": 3995, "task_loss": 0.5938208699226379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5434603095054626, "epoch": 3.38, "learning_rate": 2.5872479169182467e-05, "loss": 0.5963, "step": 3996, "task_loss": 0.47569167613983154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8213906288146973, "epoch": 3.38, "learning_rate": 2.5866441251056638e-05, "loss": 0.7028, "step": 3997, "task_loss": 1.6206679344177246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3728753924369812, "epoch": 3.38, "learning_rate": 2.5860403332930805e-05, "loss": 0.5721, "step": 3998, "task_loss": 0.9220856428146362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9597940444946289, "epoch": 3.38, "learning_rate": 2.5854365414804975e-05, "loss": 0.6385, "step": 3999, "task_loss": 0.7873920202255249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38649553060531616, "epoch": 3.38, "learning_rate": 2.584832749667915e-05, "loss": 0.52, "step": 4000, "task_loss": 0.7521729469299316 }, { "epoch": 3.38, "eval_accuracy": 0.8973861386138614, "eval_loss": 0.39273032546043396, "eval_runtime": 227.3471, "eval_samples_per_second": 111.064, "eval_steps_per_second": 0.871, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5619375109672546, "epoch": 3.38, "learning_rate": 2.5842289578553313e-05, "loss": 0.5681, "step": 4001, "task_loss": 0.8012285232543945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8093260526657104, "epoch": 3.38, "learning_rate": 2.5836251660427484e-05, "loss": 0.6435, "step": 4002, "task_loss": 0.6069123148918152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.964133083820343, "epoch": 3.38, "learning_rate": 2.5830213742301658e-05, "loss": 0.6817, "step": 4003, "task_loss": 1.0303255319595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4471619427204132, "epoch": 3.38, "learning_rate": 2.582417582417583e-05, "loss": 0.4236, "step": 4004, "task_loss": 0.6342084407806396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8036463260650635, "epoch": 3.39, "learning_rate": 2.5818137906049996e-05, "loss": 0.6716, "step": 4005, "task_loss": 0.7874033451080322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5635591745376587, "epoch": 3.39, "learning_rate": 2.5812099987924166e-05, "loss": 0.4473, "step": 4006, "task_loss": 0.7093924283981323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.556754231452942, "epoch": 3.39, "learning_rate": 2.5806062069798337e-05, "loss": 0.7823, "step": 4007, "task_loss": 0.6224846839904785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6854926347732544, "epoch": 3.39, "learning_rate": 2.5800024151672504e-05, "loss": 0.7555, "step": 4008, "task_loss": 0.69862961769104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1127930879592896, "epoch": 3.39, "learning_rate": 2.5793986233546674e-05, "loss": 0.7784, "step": 4009, "task_loss": 0.9319111108779907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44248420000076294, "epoch": 3.39, "learning_rate": 2.5787948315420845e-05, "loss": 0.5076, "step": 4010, "task_loss": 0.4517652988433838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6492220759391785, "epoch": 3.39, "learning_rate": 2.5781910397295012e-05, "loss": 0.6031, "step": 4011, "task_loss": 0.556274950504303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40293532609939575, "epoch": 3.39, "learning_rate": 2.5775872479169183e-05, "loss": 0.4453, "step": 4012, "task_loss": 0.9317710399627686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6116511225700378, "epoch": 3.39, "learning_rate": 2.5769834561043353e-05, "loss": 0.6725, "step": 4013, "task_loss": 2.324150800704956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.479971319437027, "epoch": 3.39, "learning_rate": 2.5763796642917527e-05, "loss": 0.6237, "step": 4014, "task_loss": 0.7184808254241943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5763856172561646, "epoch": 3.39, "learning_rate": 2.575775872479169e-05, "loss": 0.6132, "step": 4015, "task_loss": 0.7311533093452454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6479064226150513, "epoch": 3.39, "learning_rate": 2.575172080666586e-05, "loss": 0.5534, "step": 4016, "task_loss": 0.7794193625450134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4574090540409088, "epoch": 3.4, "learning_rate": 2.5745682888540036e-05, "loss": 0.6907, "step": 4017, "task_loss": 0.17865455150604248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6432714462280273, "epoch": 3.4, "learning_rate": 2.57396449704142e-05, "loss": 0.6805, "step": 4018, "task_loss": 0.649492084980011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.685693621635437, "epoch": 3.4, "learning_rate": 2.5733607052288373e-05, "loss": 0.7439, "step": 4019, "task_loss": 1.2685801982879639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6344225406646729, "epoch": 3.4, "learning_rate": 2.5727569134162544e-05, "loss": 0.5662, "step": 4020, "task_loss": 0.2664928734302521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6037537455558777, "epoch": 3.4, "learning_rate": 2.572153121603671e-05, "loss": 0.6707, "step": 4021, "task_loss": 0.5969308614730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7958155870437622, "epoch": 3.4, "learning_rate": 2.5715493297910882e-05, "loss": 0.8082, "step": 4022, "task_loss": 0.5687426328659058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45709228515625, "epoch": 3.4, "learning_rate": 2.5709455379785052e-05, "loss": 0.5067, "step": 4023, "task_loss": 0.1992705762386322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9010422825813293, "epoch": 3.4, "learning_rate": 2.5703417461659223e-05, "loss": 0.755, "step": 4024, "task_loss": 0.7553755640983582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3833635449409485, "epoch": 3.4, "learning_rate": 2.569737954353339e-05, "loss": 0.7304, "step": 4025, "task_loss": 0.6125249862670898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5408085584640503, "epoch": 3.4, "learning_rate": 2.569134162540756e-05, "loss": 0.6574, "step": 4026, "task_loss": 0.7149351239204407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8328060507774353, "epoch": 3.4, "learning_rate": 2.568530370728173e-05, "loss": 0.8334, "step": 4027, "task_loss": 1.4344416856765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4606068730354309, "epoch": 3.4, "learning_rate": 2.56792657891559e-05, "loss": 0.6398, "step": 4028, "task_loss": 1.458067774772644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6227245330810547, "epoch": 3.41, "learning_rate": 2.567322787103007e-05, "loss": 0.5176, "step": 4029, "task_loss": 0.8357695937156677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43120628595352173, "epoch": 3.41, "learning_rate": 2.5667189952904243e-05, "loss": 0.6126, "step": 4030, "task_loss": 0.48504894971847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0483330488204956, "epoch": 3.41, "learning_rate": 2.5661152034778407e-05, "loss": 0.792, "step": 4031, "task_loss": 0.6739441156387329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8086826801300049, "epoch": 3.41, "learning_rate": 2.5655114116652577e-05, "loss": 0.7237, "step": 4032, "task_loss": 0.9790715575218201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6444809436798096, "epoch": 3.41, "learning_rate": 2.564907619852675e-05, "loss": 0.6289, "step": 4033, "task_loss": 0.388263076543808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42495161294937134, "epoch": 3.41, "learning_rate": 2.5643038280400922e-05, "loss": 0.6366, "step": 4034, "task_loss": 1.5956201553344727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8781970739364624, "epoch": 3.41, "learning_rate": 2.563700036227509e-05, "loss": 0.6764, "step": 4035, "task_loss": 1.2105929851531982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8940562605857849, "epoch": 3.41, "learning_rate": 2.563096244414926e-05, "loss": 0.6472, "step": 4036, "task_loss": 1.3130667209625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7000995874404907, "epoch": 3.41, "learning_rate": 2.562492452602343e-05, "loss": 0.6387, "step": 4037, "task_loss": 0.516766369342804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0856273174285889, "epoch": 3.41, "learning_rate": 2.5618886607897597e-05, "loss": 0.7653, "step": 4038, "task_loss": 1.5353285074234009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45068103075027466, "epoch": 3.41, "learning_rate": 2.5612848689771768e-05, "loss": 0.6586, "step": 4039, "task_loss": 1.0589542388916016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3232635259628296, "epoch": 3.41, "learning_rate": 2.560681077164594e-05, "loss": 0.4381, "step": 4040, "task_loss": 0.5172564387321472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9166985750198364, "epoch": 3.42, "learning_rate": 2.5600772853520106e-05, "loss": 0.6915, "step": 4041, "task_loss": 1.1762224435806274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35645991563796997, "epoch": 3.42, "learning_rate": 2.5594734935394276e-05, "loss": 0.6033, "step": 4042, "task_loss": 0.3474424481391907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3267214894294739, "epoch": 3.42, "learning_rate": 2.5588697017268447e-05, "loss": 0.6968, "step": 4043, "task_loss": 0.8212542533874512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.777168869972229, "epoch": 3.42, "learning_rate": 2.558265909914262e-05, "loss": 0.4834, "step": 4044, "task_loss": 0.22739778459072113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7606520652770996, "epoch": 3.42, "learning_rate": 2.5576621181016785e-05, "loss": 0.5895, "step": 4045, "task_loss": 0.24477140605449677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6149568557739258, "epoch": 3.42, "learning_rate": 2.557058326289096e-05, "loss": 0.5491, "step": 4046, "task_loss": 0.9849258065223694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3711242079734802, "epoch": 3.42, "learning_rate": 2.556454534476513e-05, "loss": 0.5893, "step": 4047, "task_loss": 0.6251998543739319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3993990421295166, "epoch": 3.42, "learning_rate": 2.5558507426639293e-05, "loss": 0.536, "step": 4048, "task_loss": 0.4073074460029602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5568403601646423, "epoch": 3.42, "learning_rate": 2.5552469508513467e-05, "loss": 0.6903, "step": 4049, "task_loss": 0.34182626008987427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45057666301727295, "epoch": 3.42, "learning_rate": 2.5546431590387637e-05, "loss": 0.7054, "step": 4050, "task_loss": 0.7158098220825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.627739429473877, "epoch": 3.42, "learning_rate": 2.5540393672261805e-05, "loss": 0.6728, "step": 4051, "task_loss": 1.679520845413208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44601646065711975, "epoch": 3.42, "learning_rate": 2.5534355754135975e-05, "loss": 0.8032, "step": 4052, "task_loss": 0.4958054721355438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6786448359489441, "epoch": 3.43, "learning_rate": 2.5528317836010146e-05, "loss": 0.5551, "step": 4053, "task_loss": 0.49955958127975464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2151453495025635, "epoch": 3.43, "learning_rate": 2.5522279917884313e-05, "loss": 0.7328, "step": 4054, "task_loss": 0.924723744392395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3481141924858093, "epoch": 3.43, "learning_rate": 2.5516241999758483e-05, "loss": 0.5074, "step": 4055, "task_loss": 0.6574508547782898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41558998823165894, "epoch": 3.43, "learning_rate": 2.5510204081632654e-05, "loss": 0.656, "step": 4056, "task_loss": 0.8131816983222961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4063583016395569, "epoch": 3.43, "learning_rate": 2.5504166163506828e-05, "loss": 0.467, "step": 4057, "task_loss": 0.5216743350028992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7392359972000122, "epoch": 3.43, "learning_rate": 2.5498128245380992e-05, "loss": 0.5464, "step": 4058, "task_loss": 0.5014329552650452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3512876331806183, "epoch": 3.43, "learning_rate": 2.5492090327255162e-05, "loss": 0.4719, "step": 4059, "task_loss": 0.825005829334259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48547133803367615, "epoch": 3.43, "learning_rate": 2.5486052409129336e-05, "loss": 0.5147, "step": 4060, "task_loss": 0.4620617926120758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3409923017024994, "epoch": 3.43, "learning_rate": 2.54800144910035e-05, "loss": 0.5508, "step": 4061, "task_loss": 0.12199026346206665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4654749929904938, "epoch": 3.43, "learning_rate": 2.5473976572877674e-05, "loss": 0.6074, "step": 4062, "task_loss": 0.5657976269721985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2463016510009766, "epoch": 3.43, "learning_rate": 2.5467938654751845e-05, "loss": 0.7483, "step": 4063, "task_loss": 0.7868435978889465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4532428979873657, "epoch": 3.44, "learning_rate": 2.546190073662601e-05, "loss": 0.6948, "step": 4064, "task_loss": 0.5779270529747009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6377803087234497, "epoch": 3.44, "learning_rate": 2.5455862818500182e-05, "loss": 0.5841, "step": 4065, "task_loss": 0.5476099252700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6959567666053772, "epoch": 3.44, "learning_rate": 2.5449824900374353e-05, "loss": 0.6357, "step": 4066, "task_loss": 1.1215462684631348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8884621262550354, "epoch": 3.44, "learning_rate": 2.5443786982248524e-05, "loss": 0.6233, "step": 4067, "task_loss": 1.4239797592163086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3187132477760315, "epoch": 3.44, "learning_rate": 2.543774906412269e-05, "loss": 0.7261, "step": 4068, "task_loss": 0.48107361793518066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.587197482585907, "epoch": 3.44, "learning_rate": 2.543171114599686e-05, "loss": 0.6108, "step": 4069, "task_loss": 0.16340531408786774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38556361198425293, "epoch": 3.44, "learning_rate": 2.5425673227871032e-05, "loss": 0.5954, "step": 4070, "task_loss": 0.8683832883834839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7441391944885254, "epoch": 3.44, "learning_rate": 2.54196353097452e-05, "loss": 0.6779, "step": 4071, "task_loss": 1.5125949382781982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5571364164352417, "epoch": 3.44, "learning_rate": 2.541359739161937e-05, "loss": 0.5353, "step": 4072, "task_loss": 1.4887259006500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5537574291229248, "epoch": 3.44, "learning_rate": 2.540755947349354e-05, "loss": 0.528, "step": 4073, "task_loss": 0.7559567093849182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4392712414264679, "epoch": 3.44, "learning_rate": 2.5401521555367707e-05, "loss": 0.4894, "step": 4074, "task_loss": 0.8013269305229187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7243004441261292, "epoch": 3.44, "learning_rate": 2.5395483637241878e-05, "loss": 0.6391, "step": 4075, "task_loss": 1.3908051252365112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32406771183013916, "epoch": 3.45, "learning_rate": 2.5389445719116052e-05, "loss": 0.3039, "step": 4076, "task_loss": 0.14536717534065247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5103085041046143, "epoch": 3.45, "learning_rate": 2.5383407800990222e-05, "loss": 0.4905, "step": 4077, "task_loss": 0.9136930704116821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37917008996009827, "epoch": 3.45, "learning_rate": 2.537736988286439e-05, "loss": 0.498, "step": 4078, "task_loss": 0.68227219581604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8792146444320679, "epoch": 3.45, "learning_rate": 2.537133196473856e-05, "loss": 0.6489, "step": 4079, "task_loss": 0.9473459720611572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.779862642288208, "epoch": 3.45, "learning_rate": 2.536529404661273e-05, "loss": 0.7844, "step": 4080, "task_loss": 0.37694743275642395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4561316967010498, "epoch": 3.45, "learning_rate": 2.5359256128486898e-05, "loss": 0.6186, "step": 4081, "task_loss": 1.2141536474227905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7657855749130249, "epoch": 3.45, "learning_rate": 2.535321821036107e-05, "loss": 0.8638, "step": 4082, "task_loss": 1.3844389915466309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24431322515010834, "epoch": 3.45, "learning_rate": 2.534718029223524e-05, "loss": 0.5785, "step": 4083, "task_loss": 0.25010982155799866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49407440423965454, "epoch": 3.45, "learning_rate": 2.5341142374109406e-05, "loss": 0.7149, "step": 4084, "task_loss": 0.5499097108840942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5480067729949951, "epoch": 3.45, "learning_rate": 2.5335104455983577e-05, "loss": 0.4441, "step": 4085, "task_loss": 0.8086363077163696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5183594822883606, "epoch": 3.45, "learning_rate": 2.5329066537857747e-05, "loss": 0.6751, "step": 4086, "task_loss": 0.7493042945861816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6638196110725403, "epoch": 3.45, "learning_rate": 2.532302861973192e-05, "loss": 0.5771, "step": 4087, "task_loss": 0.6907023787498474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8202665448188782, "epoch": 3.46, "learning_rate": 2.5316990701606085e-05, "loss": 0.8535, "step": 4088, "task_loss": 0.6950072646141052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5491451025009155, "epoch": 3.46, "learning_rate": 2.5310952783480256e-05, "loss": 0.4876, "step": 4089, "task_loss": 0.7841203212738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5857775211334229, "epoch": 3.46, "learning_rate": 2.530491486535443e-05, "loss": 0.6818, "step": 4090, "task_loss": 1.331052541732788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6425918340682983, "epoch": 3.46, "learning_rate": 2.5298876947228594e-05, "loss": 0.6916, "step": 4091, "task_loss": 0.4713674783706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.549384593963623, "epoch": 3.46, "learning_rate": 2.5292839029102767e-05, "loss": 0.7336, "step": 4092, "task_loss": 0.6088681221008301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39014825224876404, "epoch": 3.46, "learning_rate": 2.5286801110976938e-05, "loss": 0.5453, "step": 4093, "task_loss": 0.31069236993789673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2622162401676178, "epoch": 3.46, "learning_rate": 2.5280763192851102e-05, "loss": 0.558, "step": 4094, "task_loss": 0.29418930411338806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6795892715454102, "epoch": 3.46, "learning_rate": 2.5274725274725276e-05, "loss": 0.6031, "step": 4095, "task_loss": 1.8265482187271118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23804152011871338, "epoch": 3.46, "learning_rate": 2.5268687356599446e-05, "loss": 0.5664, "step": 4096, "task_loss": 0.10905683040618896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36927133798599243, "epoch": 3.46, "learning_rate": 2.5262649438473617e-05, "loss": 0.5172, "step": 4097, "task_loss": 0.29389524459838867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3518790006637573, "epoch": 3.46, "learning_rate": 2.5256611520347784e-05, "loss": 0.4797, "step": 4098, "task_loss": 0.4755506217479706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9117350578308105, "epoch": 3.46, "learning_rate": 2.5250573602221955e-05, "loss": 0.7022, "step": 4099, "task_loss": 0.7956864833831787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4939345419406891, "epoch": 3.47, "learning_rate": 2.5244535684096125e-05, "loss": 0.5168, "step": 4100, "task_loss": 0.23132286965847015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7668377161026001, "epoch": 3.47, "learning_rate": 2.5238497765970292e-05, "loss": 0.7647, "step": 4101, "task_loss": 0.3080413341522217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6111080646514893, "epoch": 3.47, "learning_rate": 2.5232459847844463e-05, "loss": 0.4855, "step": 4102, "task_loss": 0.6244467496871948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4321189820766449, "epoch": 3.47, "learning_rate": 2.5226421929718637e-05, "loss": 0.6329, "step": 4103, "task_loss": 0.9587082862854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5987145900726318, "epoch": 3.47, "learning_rate": 2.52203840115928e-05, "loss": 0.6725, "step": 4104, "task_loss": 0.7074791789054871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6524959802627563, "epoch": 3.47, "learning_rate": 2.521434609346697e-05, "loss": 0.6426, "step": 4105, "task_loss": 0.8801628351211548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6409686207771301, "epoch": 3.47, "learning_rate": 2.5208308175341145e-05, "loss": 0.5967, "step": 4106, "task_loss": 0.3904375731945038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40125250816345215, "epoch": 3.47, "learning_rate": 2.5202270257215316e-05, "loss": 0.5681, "step": 4107, "task_loss": 0.8492122292518616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9124498963356018, "epoch": 3.47, "learning_rate": 2.5196232339089483e-05, "loss": 0.6647, "step": 4108, "task_loss": 0.7564025521278381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8590701818466187, "epoch": 3.47, "learning_rate": 2.5190194420963654e-05, "loss": 0.5719, "step": 4109, "task_loss": 0.5583292245864868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5360000133514404, "epoch": 3.47, "learning_rate": 2.5184156502837824e-05, "loss": 0.5825, "step": 4110, "task_loss": 0.5857567191123962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.757627010345459, "epoch": 3.47, "learning_rate": 2.517811858471199e-05, "loss": 0.6933, "step": 4111, "task_loss": 0.3940114378929138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7134948968887329, "epoch": 3.48, "learning_rate": 2.5172080666586162e-05, "loss": 0.6093, "step": 4112, "task_loss": 0.429108589887619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44072428345680237, "epoch": 3.48, "learning_rate": 2.5166042748460333e-05, "loss": 0.533, "step": 4113, "task_loss": 1.0380498170852661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.573554277420044, "epoch": 3.48, "learning_rate": 2.51600048303345e-05, "loss": 0.5996, "step": 4114, "task_loss": 0.3197292983531952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5454357862472534, "epoch": 3.48, "learning_rate": 2.515396691220867e-05, "loss": 0.625, "step": 4115, "task_loss": 0.222787007689476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1879358291625977, "epoch": 3.48, "learning_rate": 2.514792899408284e-05, "loss": 0.6776, "step": 4116, "task_loss": 1.277449369430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4026101231575012, "epoch": 3.48, "learning_rate": 2.5141891075957015e-05, "loss": 0.5022, "step": 4117, "task_loss": 0.4813377261161804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6002867221832275, "epoch": 3.48, "learning_rate": 2.513585315783118e-05, "loss": 0.7186, "step": 4118, "task_loss": 0.7160503268241882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48108774423599243, "epoch": 3.48, "learning_rate": 2.5129815239705353e-05, "loss": 0.4579, "step": 4119, "task_loss": 1.2436598539352417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2867410182952881, "epoch": 3.48, "learning_rate": 2.5123777321579523e-05, "loss": 0.5922, "step": 4120, "task_loss": 0.8729248046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4832357168197632, "epoch": 3.48, "learning_rate": 2.5117739403453687e-05, "loss": 0.9395, "step": 4121, "task_loss": 0.7131137251853943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7502032518386841, "epoch": 3.48, "learning_rate": 2.511170148532786e-05, "loss": 0.6439, "step": 4122, "task_loss": 0.6653632521629333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4150236248970032, "epoch": 3.48, "learning_rate": 2.510566356720203e-05, "loss": 0.5345, "step": 4123, "task_loss": 0.35604193806648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6976333260536194, "epoch": 3.49, "learning_rate": 2.50996256490762e-05, "loss": 0.5961, "step": 4124, "task_loss": 0.5215651988983154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8826763033866882, "epoch": 3.49, "learning_rate": 2.509358773095037e-05, "loss": 0.657, "step": 4125, "task_loss": 1.1694620847702026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6594715118408203, "epoch": 3.49, "learning_rate": 2.508754981282454e-05, "loss": 0.5551, "step": 4126, "task_loss": 1.1890171766281128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38642674684524536, "epoch": 3.49, "learning_rate": 2.508151189469871e-05, "loss": 0.5521, "step": 4127, "task_loss": 0.1710108518600464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5006185173988342, "epoch": 3.49, "learning_rate": 2.5075473976572878e-05, "loss": 0.5986, "step": 4128, "task_loss": 0.2357504516839981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1956998109817505, "epoch": 3.49, "learning_rate": 2.5069436058447048e-05, "loss": 0.7964, "step": 4129, "task_loss": 0.7756554484367371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6721758246421814, "epoch": 3.49, "learning_rate": 2.506339814032122e-05, "loss": 0.6348, "step": 4130, "task_loss": 0.3779347836971283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7650978565216064, "epoch": 3.49, "learning_rate": 2.5057360222195386e-05, "loss": 0.5759, "step": 4131, "task_loss": 1.070095419883728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46239903569221497, "epoch": 3.49, "learning_rate": 2.5051322304069556e-05, "loss": 0.4727, "step": 4132, "task_loss": 0.3767215311527252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.588857889175415, "epoch": 3.49, "learning_rate": 2.504528438594373e-05, "loss": 0.6174, "step": 4133, "task_loss": 0.8602174520492554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7712404727935791, "epoch": 3.49, "learning_rate": 2.5039246467817894e-05, "loss": 0.6819, "step": 4134, "task_loss": 0.4068267047405243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48693594336509705, "epoch": 3.5, "learning_rate": 2.5033208549692068e-05, "loss": 0.7122, "step": 4135, "task_loss": 0.4525044858455658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4844130873680115, "epoch": 3.5, "learning_rate": 2.502717063156624e-05, "loss": 0.4968, "step": 4136, "task_loss": 0.8312535285949707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6508995294570923, "epoch": 3.5, "learning_rate": 2.502113271344041e-05, "loss": 0.6318, "step": 4137, "task_loss": 0.5318276882171631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3770734369754791, "epoch": 3.5, "learning_rate": 2.5015094795314576e-05, "loss": 0.7708, "step": 4138, "task_loss": 0.24721471965312958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8009540438652039, "epoch": 3.5, "learning_rate": 2.5009056877188747e-05, "loss": 0.6698, "step": 4139, "task_loss": 0.6334355473518372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4211912155151367, "epoch": 3.5, "learning_rate": 2.5003018959062918e-05, "loss": 0.7383, "step": 4140, "task_loss": 0.3429170548915863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2077077627182007, "epoch": 3.5, "learning_rate": 2.4996981040937088e-05, "loss": 0.6292, "step": 4141, "task_loss": 0.6622662544250488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.071517825126648, "epoch": 3.5, "learning_rate": 2.4990943122811255e-05, "loss": 0.7037, "step": 4142, "task_loss": 1.365580677986145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6356531381607056, "epoch": 3.5, "learning_rate": 2.4984905204685426e-05, "loss": 0.6026, "step": 4143, "task_loss": 0.2705966532230377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6606204509735107, "epoch": 3.5, "learning_rate": 2.4978867286559597e-05, "loss": 0.5695, "step": 4144, "task_loss": 0.6722994446754456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6121552586555481, "epoch": 3.5, "learning_rate": 2.4972829368433764e-05, "loss": 0.5181, "step": 4145, "task_loss": 0.7211894392967224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7234222292900085, "epoch": 3.5, "learning_rate": 2.4966791450307934e-05, "loss": 0.558, "step": 4146, "task_loss": 0.9108874201774597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6865897178649902, "epoch": 3.51, "learning_rate": 2.4960753532182105e-05, "loss": 0.7021, "step": 4147, "task_loss": 0.9909319877624512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6469772458076477, "epoch": 3.51, "learning_rate": 2.4954715614056272e-05, "loss": 0.5438, "step": 4148, "task_loss": 0.45413684844970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6964613199234009, "epoch": 3.51, "learning_rate": 2.4948677695930446e-05, "loss": 0.4652, "step": 4149, "task_loss": 0.23486928641796112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7621926069259644, "epoch": 3.51, "learning_rate": 2.4942639777804613e-05, "loss": 0.6954, "step": 4150, "task_loss": 1.6399043798446655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3576836585998535, "epoch": 3.51, "learning_rate": 2.4936601859678784e-05, "loss": 0.7718, "step": 4151, "task_loss": 0.20914240181446075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9007201194763184, "epoch": 3.51, "learning_rate": 2.4930563941552954e-05, "loss": 0.7603, "step": 4152, "task_loss": 0.37844640016555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42565685510635376, "epoch": 3.51, "learning_rate": 2.492452602342712e-05, "loss": 0.584, "step": 4153, "task_loss": 1.0903626680374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.780028760433197, "epoch": 3.51, "learning_rate": 2.4918488105301292e-05, "loss": 0.5745, "step": 4154, "task_loss": 0.5820525884628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6406984329223633, "epoch": 3.51, "learning_rate": 2.4912450187175463e-05, "loss": 0.55, "step": 4155, "task_loss": 0.2671067416667938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47206252813339233, "epoch": 3.51, "learning_rate": 2.4906412269049633e-05, "loss": 0.5297, "step": 4156, "task_loss": 0.6230443120002747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6872288584709167, "epoch": 3.51, "learning_rate": 2.4900374350923804e-05, "loss": 0.5656, "step": 4157, "task_loss": 0.49943751096725464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5443592071533203, "epoch": 3.51, "learning_rate": 2.489433643279797e-05, "loss": 0.5494, "step": 4158, "task_loss": 1.2601468563079834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9456206560134888, "epoch": 3.52, "learning_rate": 2.488829851467214e-05, "loss": 0.6375, "step": 4159, "task_loss": 0.7289506196975708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6927809119224548, "epoch": 3.52, "learning_rate": 2.4882260596546312e-05, "loss": 0.6227, "step": 4160, "task_loss": 0.8540127277374268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6867104768753052, "epoch": 3.52, "learning_rate": 2.4876222678420483e-05, "loss": 0.7358, "step": 4161, "task_loss": 1.230502963066101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6967648863792419, "epoch": 3.52, "learning_rate": 2.487018476029465e-05, "loss": 0.5537, "step": 4162, "task_loss": 1.09322190284729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5458663702011108, "epoch": 3.52, "learning_rate": 2.486414684216882e-05, "loss": 0.5668, "step": 4163, "task_loss": 0.8900253176689148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0240482091903687, "epoch": 3.52, "learning_rate": 2.485810892404299e-05, "loss": 0.9407, "step": 4164, "task_loss": 0.8457967042922974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6908811926841736, "epoch": 3.52, "learning_rate": 2.485207100591716e-05, "loss": 0.491, "step": 4165, "task_loss": 1.1205745935440063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48710277676582336, "epoch": 3.52, "learning_rate": 2.4846033087791332e-05, "loss": 0.5711, "step": 4166, "task_loss": 0.5901609063148499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.693988025188446, "epoch": 3.52, "learning_rate": 2.48399951696655e-05, "loss": 0.6861, "step": 4167, "task_loss": 1.334979772567749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34161728620529175, "epoch": 3.52, "learning_rate": 2.483395725153967e-05, "loss": 0.6088, "step": 4168, "task_loss": 0.22691932320594788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3767860233783722, "epoch": 3.52, "learning_rate": 2.482791933341384e-05, "loss": 0.3688, "step": 4169, "task_loss": 0.15169915556907654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7047252058982849, "epoch": 3.52, "learning_rate": 2.4821881415288008e-05, "loss": 0.5086, "step": 4170, "task_loss": 0.9495813846588135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37209832668304443, "epoch": 3.53, "learning_rate": 2.481584349716218e-05, "loss": 0.5325, "step": 4171, "task_loss": 0.23220722377300262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9835602045059204, "epoch": 3.53, "learning_rate": 2.480980557903635e-05, "loss": 0.6086, "step": 4172, "task_loss": 1.6538671255111694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3925881087779999, "epoch": 3.53, "learning_rate": 2.480376766091052e-05, "loss": 0.8047, "step": 4173, "task_loss": 1.5235484838485718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7159826755523682, "epoch": 3.53, "learning_rate": 2.479772974278469e-05, "loss": 0.6364, "step": 4174, "task_loss": 1.7120332717895508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4556894600391388, "epoch": 3.53, "learning_rate": 2.4791691824658857e-05, "loss": 0.6396, "step": 4175, "task_loss": 0.4975559115409851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5380347371101379, "epoch": 3.53, "learning_rate": 2.478565390653303e-05, "loss": 0.6157, "step": 4176, "task_loss": 0.6434682011604309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42392241954803467, "epoch": 3.53, "learning_rate": 2.4779615988407198e-05, "loss": 0.6646, "step": 4177, "task_loss": 0.8844846487045288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24888218939304352, "epoch": 3.53, "learning_rate": 2.4773578070281365e-05, "loss": 0.4635, "step": 4178, "task_loss": 0.40036919713020325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6721847057342529, "epoch": 3.53, "learning_rate": 2.476754015215554e-05, "loss": 0.6057, "step": 4179, "task_loss": 1.0908523797988892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7315338253974915, "epoch": 3.53, "learning_rate": 2.4761502234029707e-05, "loss": 0.5388, "step": 4180, "task_loss": 0.617293119430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9069807529449463, "epoch": 3.53, "learning_rate": 2.4755464315903877e-05, "loss": 0.6865, "step": 4181, "task_loss": 1.3291676044464111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3983303904533386, "epoch": 3.53, "learning_rate": 2.4749426397778048e-05, "loss": 0.5438, "step": 4182, "task_loss": 1.0584784746170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5257368087768555, "epoch": 3.54, "learning_rate": 2.4743388479652215e-05, "loss": 0.6955, "step": 4183, "task_loss": 0.10372164845466614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43408524990081787, "epoch": 3.54, "learning_rate": 2.473735056152639e-05, "loss": 0.5451, "step": 4184, "task_loss": 0.48534634709358215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5947525501251221, "epoch": 3.54, "learning_rate": 2.4731312643400556e-05, "loss": 0.5278, "step": 4185, "task_loss": 1.2702536582946777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.617225170135498, "epoch": 3.54, "learning_rate": 2.4725274725274727e-05, "loss": 0.5633, "step": 4186, "task_loss": 0.623254656791687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0065912008285522, "epoch": 3.54, "learning_rate": 2.4719236807148897e-05, "loss": 0.6114, "step": 4187, "task_loss": 0.6516858339309692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3438969850540161, "epoch": 3.54, "learning_rate": 2.4713198889023064e-05, "loss": 0.5736, "step": 4188, "task_loss": 0.8995336890220642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34393566846847534, "epoch": 3.54, "learning_rate": 2.4707160970897235e-05, "loss": 0.6471, "step": 4189, "task_loss": 1.2938134670257568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8052244186401367, "epoch": 3.54, "learning_rate": 2.4701123052771406e-05, "loss": 0.5769, "step": 4190, "task_loss": 0.7245664596557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.513466477394104, "epoch": 3.54, "learning_rate": 2.4695085134645576e-05, "loss": 0.5096, "step": 4191, "task_loss": 0.25636932253837585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5502128005027771, "epoch": 3.54, "learning_rate": 2.4689047216519747e-05, "loss": 0.6867, "step": 4192, "task_loss": 1.2755722999572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5753607153892517, "epoch": 3.54, "learning_rate": 2.4683009298393914e-05, "loss": 0.5693, "step": 4193, "task_loss": 1.0072088241577148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9075958132743835, "epoch": 3.54, "learning_rate": 2.4676971380268084e-05, "loss": 0.6451, "step": 4194, "task_loss": 0.2446889579296112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6129429936408997, "epoch": 3.55, "learning_rate": 2.4670933462142255e-05, "loss": 0.6777, "step": 4195, "task_loss": 1.3101462125778198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8324920535087585, "epoch": 3.55, "learning_rate": 2.4664895544016426e-05, "loss": 0.4754, "step": 4196, "task_loss": 0.821843147277832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5216440558433533, "epoch": 3.55, "learning_rate": 2.4658857625890593e-05, "loss": 0.4818, "step": 4197, "task_loss": 0.6869677305221558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3029025197029114, "epoch": 3.55, "learning_rate": 2.4652819707764763e-05, "loss": 0.6883, "step": 4198, "task_loss": 0.9774749875068665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7073577642440796, "epoch": 3.55, "learning_rate": 2.4646781789638934e-05, "loss": 0.608, "step": 4199, "task_loss": 1.3318475484848022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6072676181793213, "epoch": 3.55, "learning_rate": 2.4640743871513104e-05, "loss": 0.5624, "step": 4200, "task_loss": 0.6497167348861694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7979310750961304, "epoch": 3.55, "learning_rate": 2.4634705953387275e-05, "loss": 0.4511, "step": 4201, "task_loss": 0.6426718235015869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7035356760025024, "epoch": 3.55, "learning_rate": 2.4628668035261442e-05, "loss": 0.5594, "step": 4202, "task_loss": 0.5779056549072266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4713970422744751, "epoch": 3.55, "learning_rate": 2.4622630117135613e-05, "loss": 0.6405, "step": 4203, "task_loss": 0.3309163749217987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.3479150533676147, "epoch": 3.55, "learning_rate": 2.4616592199009783e-05, "loss": 0.8725, "step": 4204, "task_loss": 0.9867119193077087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44606366753578186, "epoch": 3.55, "learning_rate": 2.461055428088395e-05, "loss": 0.5515, "step": 4205, "task_loss": 0.6819097399711609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8505806922912598, "epoch": 3.56, "learning_rate": 2.4604516362758124e-05, "loss": 0.6598, "step": 4206, "task_loss": 0.9419381618499756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7018644213676453, "epoch": 3.56, "learning_rate": 2.459847844463229e-05, "loss": 0.7172, "step": 4207, "task_loss": 0.3468632400035858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7288522124290466, "epoch": 3.56, "learning_rate": 2.4592440526506462e-05, "loss": 0.5848, "step": 4208, "task_loss": 0.7241808772087097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5328664183616638, "epoch": 3.56, "learning_rate": 2.4586402608380633e-05, "loss": 0.5549, "step": 4209, "task_loss": 0.7984256148338318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3230014741420746, "epoch": 3.56, "learning_rate": 2.45803646902548e-05, "loss": 0.5678, "step": 4210, "task_loss": 0.19243711233139038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5487404465675354, "epoch": 3.56, "learning_rate": 2.457432677212897e-05, "loss": 0.6123, "step": 4211, "task_loss": 0.850402295589447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6502159237861633, "epoch": 3.56, "learning_rate": 2.456828885400314e-05, "loss": 0.5805, "step": 4212, "task_loss": 0.8755824565887451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5028156042098999, "epoch": 3.56, "learning_rate": 2.456225093587731e-05, "loss": 0.5127, "step": 4213, "task_loss": 0.5019248723983765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.399415522813797, "epoch": 3.56, "learning_rate": 2.4556213017751482e-05, "loss": 0.5287, "step": 4214, "task_loss": 0.9000704884529114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3839940130710602, "epoch": 3.56, "learning_rate": 2.455017509962565e-05, "loss": 0.7481, "step": 4215, "task_loss": 0.6470539569854736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48718535900115967, "epoch": 3.56, "learning_rate": 2.454413718149982e-05, "loss": 0.4779, "step": 4216, "task_loss": 0.6259360909461975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3314751386642456, "epoch": 3.56, "learning_rate": 2.453809926337399e-05, "loss": 0.4968, "step": 4217, "task_loss": 0.7570702433586121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4707067012786865, "epoch": 3.57, "learning_rate": 2.4532061345248158e-05, "loss": 0.642, "step": 4218, "task_loss": 0.5359216332435608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7616022825241089, "epoch": 3.57, "learning_rate": 2.452602342712233e-05, "loss": 0.622, "step": 4219, "task_loss": 1.0238831043243408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4238653779029846, "epoch": 3.57, "learning_rate": 2.45199855089965e-05, "loss": 0.7518, "step": 4220, "task_loss": 0.9566068053245544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23930177092552185, "epoch": 3.57, "learning_rate": 2.451394759087067e-05, "loss": 0.586, "step": 4221, "task_loss": 0.08897644281387329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49163293838500977, "epoch": 3.57, "learning_rate": 2.450790967274484e-05, "loss": 0.476, "step": 4222, "task_loss": 0.846335768699646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6995770931243896, "epoch": 3.57, "learning_rate": 2.4501871754619007e-05, "loss": 0.4632, "step": 4223, "task_loss": 1.1655348539352417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1134285926818848, "epoch": 3.57, "learning_rate": 2.4495833836493178e-05, "loss": 0.6538, "step": 4224, "task_loss": 1.3200987577438354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6507658958435059, "epoch": 3.57, "learning_rate": 2.448979591836735e-05, "loss": 0.7354, "step": 4225, "task_loss": 1.245566725730896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45306989550590515, "epoch": 3.57, "learning_rate": 2.448375800024152e-05, "loss": 0.5751, "step": 4226, "task_loss": 0.9382508397102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2857702970504761, "epoch": 3.57, "learning_rate": 2.4477720082115686e-05, "loss": 0.5196, "step": 4227, "task_loss": 0.46433836221694946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5763422250747681, "epoch": 3.57, "learning_rate": 2.4471682163989857e-05, "loss": 0.5633, "step": 4228, "task_loss": 0.3643262982368469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6482219696044922, "epoch": 3.57, "learning_rate": 2.4465644245864027e-05, "loss": 0.8671, "step": 4229, "task_loss": 1.3384655714035034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6682378053665161, "epoch": 3.58, "learning_rate": 2.4459606327738198e-05, "loss": 0.5398, "step": 4230, "task_loss": 0.7895174622535706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4206162691116333, "epoch": 3.58, "learning_rate": 2.445356840961237e-05, "loss": 0.5475, "step": 4231, "task_loss": 0.5694159865379333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4416455626487732, "epoch": 3.58, "learning_rate": 2.4447530491486536e-05, "loss": 0.6186, "step": 4232, "task_loss": 0.8253732323646545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6457151770591736, "epoch": 3.58, "learning_rate": 2.4441492573360706e-05, "loss": 0.523, "step": 4233, "task_loss": 0.9314072728157043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35384833812713623, "epoch": 3.58, "learning_rate": 2.4435454655234877e-05, "loss": 0.4445, "step": 4234, "task_loss": 0.4616059958934784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8133848905563354, "epoch": 3.58, "learning_rate": 2.4429416737109044e-05, "loss": 0.548, "step": 4235, "task_loss": 0.6348656415939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3496147096157074, "epoch": 3.58, "learning_rate": 2.4423378818983218e-05, "loss": 0.4827, "step": 4236, "task_loss": 0.0861474797129631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8831364512443542, "epoch": 3.58, "learning_rate": 2.4417340900857385e-05, "loss": 0.7353, "step": 4237, "task_loss": 1.387492299079895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3587501049041748, "epoch": 3.58, "learning_rate": 2.4411302982731556e-05, "loss": 0.6289, "step": 4238, "task_loss": 0.9814375638961792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5531660318374634, "epoch": 3.58, "learning_rate": 2.4405265064605726e-05, "loss": 0.5689, "step": 4239, "task_loss": 0.43527480959892273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7911735773086548, "epoch": 3.58, "learning_rate": 2.4399227146479893e-05, "loss": 0.6207, "step": 4240, "task_loss": 0.9894632697105408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49902456998825073, "epoch": 3.58, "learning_rate": 2.4393189228354067e-05, "loss": 0.65, "step": 4241, "task_loss": 0.7161417603492737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5667104721069336, "epoch": 3.59, "learning_rate": 2.4387151310228235e-05, "loss": 0.7315, "step": 4242, "task_loss": 1.3169701099395752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7981175184249878, "epoch": 3.59, "learning_rate": 2.4381113392102402e-05, "loss": 0.6645, "step": 4243, "task_loss": 0.4979659616947174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5441055297851562, "epoch": 3.59, "learning_rate": 2.4375075473976576e-05, "loss": 0.513, "step": 4244, "task_loss": 0.9403365254402161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31866455078125, "epoch": 3.59, "learning_rate": 2.4369037555850743e-05, "loss": 0.5917, "step": 4245, "task_loss": 0.17901165783405304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.683908998966217, "epoch": 3.59, "learning_rate": 2.4362999637724913e-05, "loss": 0.5988, "step": 4246, "task_loss": 0.3908775746822357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46619880199432373, "epoch": 3.59, "learning_rate": 2.4356961719599084e-05, "loss": 0.5163, "step": 4247, "task_loss": 0.901326060295105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4998355209827423, "epoch": 3.59, "learning_rate": 2.435092380147325e-05, "loss": 0.5721, "step": 4248, "task_loss": 0.29014265537261963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6405777931213379, "epoch": 3.59, "learning_rate": 2.4344885883347425e-05, "loss": 0.6047, "step": 4249, "task_loss": 1.243961215019226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4013212323188782, "epoch": 3.59, "learning_rate": 2.4338847965221592e-05, "loss": 0.4176, "step": 4250, "task_loss": 0.6892869472503662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7979017496109009, "epoch": 3.59, "learning_rate": 2.4332810047095763e-05, "loss": 0.8838, "step": 4251, "task_loss": 0.487253338098526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5379495620727539, "epoch": 3.59, "learning_rate": 2.4326772128969933e-05, "loss": 0.5005, "step": 4252, "task_loss": 0.7251278162002563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0675206184387207, "epoch": 3.59, "learning_rate": 2.43207342108441e-05, "loss": 0.7608, "step": 4253, "task_loss": 2.178403377532959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3482409119606018, "epoch": 3.6, "learning_rate": 2.431469629271827e-05, "loss": 0.6384, "step": 4254, "task_loss": 0.7409291863441467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.393201619386673, "epoch": 3.6, "learning_rate": 2.4308658374592442e-05, "loss": 0.5204, "step": 4255, "task_loss": 0.5000784397125244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8834972381591797, "epoch": 3.6, "learning_rate": 2.4302620456466612e-05, "loss": 0.6308, "step": 4256, "task_loss": 1.3259788751602173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0330417156219482, "epoch": 3.6, "learning_rate": 2.4296582538340783e-05, "loss": 0.6635, "step": 4257, "task_loss": 1.0197449922561646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23399166762828827, "epoch": 3.6, "learning_rate": 2.429054462021495e-05, "loss": 0.4484, "step": 4258, "task_loss": 0.23665161430835724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6208016872406006, "epoch": 3.6, "learning_rate": 2.428450670208912e-05, "loss": 0.669, "step": 4259, "task_loss": 0.9160673022270203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8655545711517334, "epoch": 3.6, "learning_rate": 2.427846878396329e-05, "loss": 0.6719, "step": 4260, "task_loss": 1.0783336162567139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37337255477905273, "epoch": 3.6, "learning_rate": 2.4272430865837462e-05, "loss": 0.4706, "step": 4261, "task_loss": 0.6342410445213318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48439961671829224, "epoch": 3.6, "learning_rate": 2.426639294771163e-05, "loss": 0.7941, "step": 4262, "task_loss": 0.46786925196647644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5604270100593567, "epoch": 3.6, "learning_rate": 2.42603550295858e-05, "loss": 0.6856, "step": 4263, "task_loss": 0.7858852744102478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26819878816604614, "epoch": 3.6, "learning_rate": 2.425431711145997e-05, "loss": 0.413, "step": 4264, "task_loss": 0.07641857117414474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27646902203559875, "epoch": 3.6, "learning_rate": 2.424827919333414e-05, "loss": 0.4224, "step": 4265, "task_loss": 0.34880775213241577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4637950360774994, "epoch": 3.61, "learning_rate": 2.424224127520831e-05, "loss": 0.4939, "step": 4266, "task_loss": 0.3946613073348999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6749200820922852, "epoch": 3.61, "learning_rate": 2.423620335708248e-05, "loss": 0.6107, "step": 4267, "task_loss": 1.0687825679779053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6110369563102722, "epoch": 3.61, "learning_rate": 2.423016543895665e-05, "loss": 0.6363, "step": 4268, "task_loss": 1.0181410312652588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36688047647476196, "epoch": 3.61, "learning_rate": 2.422412752083082e-05, "loss": 0.6073, "step": 4269, "task_loss": 0.29250049591064453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45256829261779785, "epoch": 3.61, "learning_rate": 2.4218089602704987e-05, "loss": 0.4911, "step": 4270, "task_loss": 0.19085165858268738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6192053556442261, "epoch": 3.61, "learning_rate": 2.421205168457916e-05, "loss": 0.6192, "step": 4271, "task_loss": 1.6141718626022339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5643174648284912, "epoch": 3.61, "learning_rate": 2.4206013766453328e-05, "loss": 0.7307, "step": 4272, "task_loss": 1.378890872001648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4509198069572449, "epoch": 3.61, "learning_rate": 2.41999758483275e-05, "loss": 0.5689, "step": 4273, "task_loss": 0.8678677678108215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36257609724998474, "epoch": 3.61, "learning_rate": 2.419393793020167e-05, "loss": 0.6665, "step": 4274, "task_loss": 0.8630626797676086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44464147090911865, "epoch": 3.61, "learning_rate": 2.4187900012075836e-05, "loss": 0.5083, "step": 4275, "task_loss": 0.750762939453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9607479572296143, "epoch": 3.61, "learning_rate": 2.4181862093950007e-05, "loss": 0.6782, "step": 4276, "task_loss": 0.6660279035568237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6897210478782654, "epoch": 3.61, "learning_rate": 2.4175824175824177e-05, "loss": 0.5142, "step": 4277, "task_loss": 0.6546536684036255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5398198366165161, "epoch": 3.62, "learning_rate": 2.4169786257698345e-05, "loss": 0.4464, "step": 4278, "task_loss": 0.35818231105804443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48610928654670715, "epoch": 3.62, "learning_rate": 2.416374833957252e-05, "loss": 0.5843, "step": 4279, "task_loss": 0.5175728797912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7794807553291321, "epoch": 3.62, "learning_rate": 2.4157710421446686e-05, "loss": 0.5415, "step": 4280, "task_loss": 0.6003097891807556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9873936176300049, "epoch": 3.62, "learning_rate": 2.4151672503320856e-05, "loss": 0.6864, "step": 4281, "task_loss": 1.0064197778701782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7737504839897156, "epoch": 3.62, "learning_rate": 2.4145634585195027e-05, "loss": 0.7493, "step": 4282, "task_loss": 0.8891194462776184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25292620062828064, "epoch": 3.62, "learning_rate": 2.4139596667069194e-05, "loss": 0.5422, "step": 4283, "task_loss": 0.37782716751098633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8298013210296631, "epoch": 3.62, "learning_rate": 2.4133558748943365e-05, "loss": 0.5606, "step": 4284, "task_loss": 1.0094630718231201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6931698322296143, "epoch": 3.62, "learning_rate": 2.4127520830817535e-05, "loss": 0.6745, "step": 4285, "task_loss": 0.5029622912406921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.884263277053833, "epoch": 3.62, "learning_rate": 2.4121482912691706e-05, "loss": 0.5401, "step": 4286, "task_loss": 0.5203794240951538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3827900290489197, "epoch": 3.62, "learning_rate": 2.4115444994565876e-05, "loss": 0.491, "step": 4287, "task_loss": 0.26666778326034546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26403582096099854, "epoch": 3.62, "learning_rate": 2.4109407076440044e-05, "loss": 0.4339, "step": 4288, "task_loss": 0.7801306247711182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6067962050437927, "epoch": 3.63, "learning_rate": 2.4103369158314214e-05, "loss": 0.6043, "step": 4289, "task_loss": 0.8797191381454468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5694129467010498, "epoch": 3.63, "learning_rate": 2.4097331240188385e-05, "loss": 0.49, "step": 4290, "task_loss": 0.9478297829627991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.300503671169281, "epoch": 3.63, "learning_rate": 2.4091293322062555e-05, "loss": 0.8414, "step": 4291, "task_loss": 0.35614240169525146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8378791809082031, "epoch": 3.63, "learning_rate": 2.4085255403936722e-05, "loss": 0.5339, "step": 4292, "task_loss": 0.8773441314697266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6370061635971069, "epoch": 3.63, "learning_rate": 2.4079217485810893e-05, "loss": 0.6115, "step": 4293, "task_loss": 1.521229863166809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7086033821105957, "epoch": 3.63, "learning_rate": 2.4073179567685064e-05, "loss": 0.602, "step": 4294, "task_loss": 0.9678382873535156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43517589569091797, "epoch": 3.63, "learning_rate": 2.4067141649559234e-05, "loss": 0.5901, "step": 4295, "task_loss": 0.6506100296974182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4611397981643677, "epoch": 3.63, "learning_rate": 2.4061103731433405e-05, "loss": 0.4927, "step": 4296, "task_loss": 0.5989790558815002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3266776502132416, "epoch": 3.63, "learning_rate": 2.4055065813307572e-05, "loss": 0.4543, "step": 4297, "task_loss": 0.46925950050354004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5356259346008301, "epoch": 3.63, "learning_rate": 2.4049027895181742e-05, "loss": 0.6457, "step": 4298, "task_loss": 1.395267128944397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39364925026893616, "epoch": 3.63, "learning_rate": 2.4042989977055913e-05, "loss": 0.4223, "step": 4299, "task_loss": 0.500012993812561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46568095684051514, "epoch": 3.63, "learning_rate": 2.403695205893008e-05, "loss": 0.6649, "step": 4300, "task_loss": 1.2760881185531616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5615409016609192, "epoch": 3.64, "learning_rate": 2.4030914140804254e-05, "loss": 0.5653, "step": 4301, "task_loss": 0.6852695941925049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5643090009689331, "epoch": 3.64, "learning_rate": 2.402487622267842e-05, "loss": 0.6068, "step": 4302, "task_loss": 0.6581242084503174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5467841625213623, "epoch": 3.64, "learning_rate": 2.4018838304552592e-05, "loss": 0.5659, "step": 4303, "task_loss": 1.1785695552825928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5230387449264526, "epoch": 3.64, "learning_rate": 2.4012800386426763e-05, "loss": 0.6225, "step": 4304, "task_loss": 0.6625786423683167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7795270681381226, "epoch": 3.64, "learning_rate": 2.400676246830093e-05, "loss": 0.7036, "step": 4305, "task_loss": 1.3561177253723145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5388163328170776, "epoch": 3.64, "learning_rate": 2.4000724550175104e-05, "loss": 0.5409, "step": 4306, "task_loss": 0.2527400851249695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6517503261566162, "epoch": 3.64, "learning_rate": 2.399468663204927e-05, "loss": 0.4718, "step": 4307, "task_loss": 0.7020302414894104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3922607898712158, "epoch": 3.64, "learning_rate": 2.3988648713923438e-05, "loss": 0.558, "step": 4308, "task_loss": 0.33792805671691895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2505198121070862, "epoch": 3.64, "learning_rate": 2.3982610795797612e-05, "loss": 0.7499, "step": 4309, "task_loss": 1.4245668649673462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2577147483825684, "epoch": 3.64, "learning_rate": 2.397657287767178e-05, "loss": 0.7844, "step": 4310, "task_loss": 1.540686845779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9071505665779114, "epoch": 3.64, "learning_rate": 2.397053495954595e-05, "loss": 0.7144, "step": 4311, "task_loss": 0.9646505117416382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8620690107345581, "epoch": 3.64, "learning_rate": 2.396449704142012e-05, "loss": 0.5949, "step": 4312, "task_loss": 0.9074426293373108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44265222549438477, "epoch": 3.65, "learning_rate": 2.3958459123294287e-05, "loss": 0.6603, "step": 4313, "task_loss": 0.4282679557800293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5303137302398682, "epoch": 3.65, "learning_rate": 2.395242120516846e-05, "loss": 0.6798, "step": 4314, "task_loss": 0.6319541931152344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7429466247558594, "epoch": 3.65, "learning_rate": 2.394638328704263e-05, "loss": 0.532, "step": 4315, "task_loss": 1.08628249168396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5437277555465698, "epoch": 3.65, "learning_rate": 2.39403453689168e-05, "loss": 0.6242, "step": 4316, "task_loss": 0.6026431918144226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5413227081298828, "epoch": 3.65, "learning_rate": 2.393430745079097e-05, "loss": 0.5093, "step": 4317, "task_loss": 1.40569269657135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6359764933586121, "epoch": 3.65, "learning_rate": 2.3928269532665137e-05, "loss": 0.5781, "step": 4318, "task_loss": 0.6995397210121155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3550418019294739, "epoch": 3.65, "learning_rate": 2.3922231614539308e-05, "loss": 0.5399, "step": 4319, "task_loss": 0.40978264808654785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.592278003692627, "epoch": 3.65, "learning_rate": 2.3916193696413478e-05, "loss": 0.4934, "step": 4320, "task_loss": 1.0277175903320312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3520861566066742, "epoch": 3.65, "learning_rate": 2.3910155778287645e-05, "loss": 0.5322, "step": 4321, "task_loss": 0.9977183938026428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4137224853038788, "epoch": 3.65, "learning_rate": 2.390411786016182e-05, "loss": 0.6491, "step": 4322, "task_loss": 0.23087714612483978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.709358811378479, "epoch": 3.65, "learning_rate": 2.3898079942035986e-05, "loss": 0.7269, "step": 4323, "task_loss": 1.2134037017822266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4022415578365326, "epoch": 3.65, "learning_rate": 2.3892042023910157e-05, "loss": 0.5052, "step": 4324, "task_loss": 0.5777286291122437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24018356204032898, "epoch": 3.66, "learning_rate": 2.3886004105784328e-05, "loss": 0.6199, "step": 4325, "task_loss": 0.042449068278074265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3976602554321289, "epoch": 3.66, "learning_rate": 2.3879966187658495e-05, "loss": 0.5082, "step": 4326, "task_loss": 0.9932011961936951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42278140783309937, "epoch": 3.66, "learning_rate": 2.3873928269532665e-05, "loss": 0.5369, "step": 4327, "task_loss": 0.45321857929229736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41785335540771484, "epoch": 3.66, "learning_rate": 2.3867890351406836e-05, "loss": 0.5708, "step": 4328, "task_loss": 0.341041624546051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37487316131591797, "epoch": 3.66, "learning_rate": 2.3861852433281006e-05, "loss": 0.4989, "step": 4329, "task_loss": 0.4843595027923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7759889960289001, "epoch": 3.66, "learning_rate": 2.3855814515155177e-05, "loss": 0.6312, "step": 4330, "task_loss": 0.7479853630065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5521308183670044, "epoch": 3.66, "learning_rate": 2.3849776597029344e-05, "loss": 0.5694, "step": 4331, "task_loss": 0.15640904009342194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3748498857021332, "epoch": 3.66, "learning_rate": 2.3843738678903515e-05, "loss": 0.3973, "step": 4332, "task_loss": 0.5818940997123718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8559672832489014, "epoch": 3.66, "learning_rate": 2.3837700760777685e-05, "loss": 0.6637, "step": 4333, "task_loss": 0.4340634047985077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5266093611717224, "epoch": 3.66, "learning_rate": 2.3831662842651856e-05, "loss": 0.451, "step": 4334, "task_loss": 1.163358449935913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46888086199760437, "epoch": 3.66, "learning_rate": 2.3825624924526023e-05, "loss": 0.6834, "step": 4335, "task_loss": 0.6721116900444031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3285461962223053, "epoch": 3.66, "learning_rate": 2.3819587006400194e-05, "loss": 0.4003, "step": 4336, "task_loss": 0.1277686357498169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33291923999786377, "epoch": 3.67, "learning_rate": 2.3813549088274364e-05, "loss": 0.7039, "step": 4337, "task_loss": 0.4311257302761078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5239934921264648, "epoch": 3.67, "learning_rate": 2.3807511170148535e-05, "loss": 0.6426, "step": 4338, "task_loss": 0.6429594159126282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5139381885528564, "epoch": 3.67, "learning_rate": 2.3801473252022705e-05, "loss": 0.5274, "step": 4339, "task_loss": 0.24639888107776642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.789879322052002, "epoch": 3.67, "learning_rate": 2.3795435333896873e-05, "loss": 0.6836, "step": 4340, "task_loss": 0.5346865057945251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4358922243118286, "epoch": 3.67, "learning_rate": 2.3789397415771043e-05, "loss": 0.6389, "step": 4341, "task_loss": 0.3873177170753479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3415587544441223, "epoch": 3.67, "learning_rate": 2.3783359497645214e-05, "loss": 0.6568, "step": 4342, "task_loss": 0.32899007201194763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.65337073802948, "epoch": 3.67, "learning_rate": 2.377732157951938e-05, "loss": 0.6593, "step": 4343, "task_loss": 0.5241606831550598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41692328453063965, "epoch": 3.67, "learning_rate": 2.3771283661393555e-05, "loss": 0.5744, "step": 4344, "task_loss": 1.3385401964187622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38485807180404663, "epoch": 3.67, "learning_rate": 2.3765245743267722e-05, "loss": 0.4901, "step": 4345, "task_loss": 0.2238738238811493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5712052583694458, "epoch": 3.67, "learning_rate": 2.375920782514189e-05, "loss": 0.592, "step": 4346, "task_loss": 0.6720612049102783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44998031854629517, "epoch": 3.67, "learning_rate": 2.3753169907016063e-05, "loss": 0.4704, "step": 4347, "task_loss": 0.42162176966667175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.570525050163269, "epoch": 3.67, "learning_rate": 2.374713198889023e-05, "loss": 0.7058, "step": 4348, "task_loss": 0.3019949793815613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9714746475219727, "epoch": 3.68, "learning_rate": 2.37410940707644e-05, "loss": 0.6358, "step": 4349, "task_loss": 1.2269765138626099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.665420651435852, "epoch": 3.68, "learning_rate": 2.373505615263857e-05, "loss": 0.4446, "step": 4350, "task_loss": 0.7301895618438721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7116296291351318, "epoch": 3.68, "learning_rate": 2.372901823451274e-05, "loss": 0.6218, "step": 4351, "task_loss": 0.6280571818351746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8663284182548523, "epoch": 3.68, "learning_rate": 2.3722980316386913e-05, "loss": 0.7794, "step": 4352, "task_loss": 0.8003362417221069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9489040374755859, "epoch": 3.68, "learning_rate": 2.371694239826108e-05, "loss": 0.8086, "step": 4353, "task_loss": 1.1657168865203857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0347764492034912, "epoch": 3.68, "learning_rate": 2.371090448013525e-05, "loss": 0.72, "step": 4354, "task_loss": 1.3878023624420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6099624037742615, "epoch": 3.68, "learning_rate": 2.370486656200942e-05, "loss": 0.725, "step": 4355, "task_loss": 1.264418363571167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5760116577148438, "epoch": 3.68, "learning_rate": 2.3698828643883588e-05, "loss": 0.6533, "step": 4356, "task_loss": 1.23392915725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5859034061431885, "epoch": 3.68, "learning_rate": 2.369279072575776e-05, "loss": 0.7091, "step": 4357, "task_loss": 0.6661136150360107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6494975090026855, "epoch": 3.68, "learning_rate": 2.368675280763193e-05, "loss": 0.4674, "step": 4358, "task_loss": 0.6247580051422119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48702794313430786, "epoch": 3.68, "learning_rate": 2.36807148895061e-05, "loss": 0.4968, "step": 4359, "task_loss": 0.785739004611969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7734709978103638, "epoch": 3.69, "learning_rate": 2.367467697138027e-05, "loss": 0.5379, "step": 4360, "task_loss": 0.4439743161201477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3113316297531128, "epoch": 3.69, "learning_rate": 2.3668639053254438e-05, "loss": 0.493, "step": 4361, "task_loss": 0.6056875586509705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4885111451148987, "epoch": 3.69, "learning_rate": 2.3662601135128608e-05, "loss": 0.5453, "step": 4362, "task_loss": 1.021718978881836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7119765281677246, "epoch": 3.69, "learning_rate": 2.365656321700278e-05, "loss": 0.6081, "step": 4363, "task_loss": 0.9250222444534302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5379183292388916, "epoch": 3.69, "learning_rate": 2.365052529887695e-05, "loss": 0.5123, "step": 4364, "task_loss": 0.29842841625213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7729349732398987, "epoch": 3.69, "learning_rate": 2.3644487380751117e-05, "loss": 0.6453, "step": 4365, "task_loss": 0.8580528497695923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3468550443649292, "epoch": 3.69, "learning_rate": 2.3638449462625287e-05, "loss": 0.6883, "step": 4366, "task_loss": 0.862120509147644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7004908323287964, "epoch": 3.69, "learning_rate": 2.3632411544499458e-05, "loss": 0.7006, "step": 4367, "task_loss": 0.7497504949569702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5857583284378052, "epoch": 3.69, "learning_rate": 2.3626373626373628e-05, "loss": 0.5364, "step": 4368, "task_loss": 1.0275447368621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5849742889404297, "epoch": 3.69, "learning_rate": 2.36203357082478e-05, "loss": 0.529, "step": 4369, "task_loss": 0.7624648809432983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4179826080799103, "epoch": 3.69, "learning_rate": 2.3614297790121966e-05, "loss": 0.6487, "step": 4370, "task_loss": 0.5821127891540527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6383635401725769, "epoch": 3.69, "learning_rate": 2.3608259871996137e-05, "loss": 0.6324, "step": 4371, "task_loss": 0.12653309106826782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5964045524597168, "epoch": 3.7, "learning_rate": 2.3602221953870307e-05, "loss": 0.4753, "step": 4372, "task_loss": 1.12154221534729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9079617261886597, "epoch": 3.7, "learning_rate": 2.3596184035744474e-05, "loss": 0.6292, "step": 4373, "task_loss": 0.9361488223075867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7502180337905884, "epoch": 3.7, "learning_rate": 2.3590146117618648e-05, "loss": 0.6016, "step": 4374, "task_loss": 0.49215951561927795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6395629644393921, "epoch": 3.7, "learning_rate": 2.3584108199492815e-05, "loss": 0.6775, "step": 4375, "task_loss": 1.8227969408035278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3942953646183014, "epoch": 3.7, "learning_rate": 2.3578070281366986e-05, "loss": 0.7455, "step": 4376, "task_loss": 0.6180973052978516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6891099810600281, "epoch": 3.7, "learning_rate": 2.3572032363241157e-05, "loss": 0.6206, "step": 4377, "task_loss": 1.15162193775177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6132393479347229, "epoch": 3.7, "learning_rate": 2.3565994445115324e-05, "loss": 0.6412, "step": 4378, "task_loss": 1.0419695377349854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.607231080532074, "epoch": 3.7, "learning_rate": 2.3559956526989498e-05, "loss": 0.6849, "step": 4379, "task_loss": 0.3432684540748596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6909276247024536, "epoch": 3.7, "learning_rate": 2.3553918608863665e-05, "loss": 0.6988, "step": 4380, "task_loss": 0.897251546382904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37630608677864075, "epoch": 3.7, "learning_rate": 2.3547880690737832e-05, "loss": 0.5215, "step": 4381, "task_loss": 0.1891927868127823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3235810399055481, "epoch": 3.7, "learning_rate": 2.3541842772612006e-05, "loss": 0.6792, "step": 4382, "task_loss": 0.13544581830501556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8620448112487793, "epoch": 3.7, "learning_rate": 2.3535804854486173e-05, "loss": 0.7341, "step": 4383, "task_loss": 0.4014430046081543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7867333889007568, "epoch": 3.71, "learning_rate": 2.3529766936360344e-05, "loss": 0.6159, "step": 4384, "task_loss": 0.3357470631599426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6868933439254761, "epoch": 3.71, "learning_rate": 2.3523729018234514e-05, "loss": 0.6366, "step": 4385, "task_loss": 1.1039340496063232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8148773312568665, "epoch": 3.71, "learning_rate": 2.351769110010868e-05, "loss": 0.5945, "step": 4386, "task_loss": 0.4654320180416107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44183096289634705, "epoch": 3.71, "learning_rate": 2.3511653181982856e-05, "loss": 0.5496, "step": 4387, "task_loss": 0.8588679432868958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.666212260723114, "epoch": 3.71, "learning_rate": 2.3505615263857023e-05, "loss": 0.5312, "step": 4388, "task_loss": 0.1847188025712967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9916696548461914, "epoch": 3.71, "learning_rate": 2.3499577345731193e-05, "loss": 0.7238, "step": 4389, "task_loss": 1.6854559183120728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19853267073631287, "epoch": 3.71, "learning_rate": 2.3493539427605364e-05, "loss": 0.4684, "step": 4390, "task_loss": 0.10029415041208267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3769868016242981, "epoch": 3.71, "learning_rate": 2.348750150947953e-05, "loss": 0.6432, "step": 4391, "task_loss": 0.9148777723312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4568653106689453, "epoch": 3.71, "learning_rate": 2.34814635913537e-05, "loss": 0.6523, "step": 4392, "task_loss": 1.2455281019210815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45522409677505493, "epoch": 3.71, "learning_rate": 2.3475425673227872e-05, "loss": 0.6375, "step": 4393, "task_loss": 0.24589979648590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6949620246887207, "epoch": 3.71, "learning_rate": 2.3469387755102043e-05, "loss": 0.6419, "step": 4394, "task_loss": 1.5485948324203491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49712884426116943, "epoch": 3.71, "learning_rate": 2.3463349836976213e-05, "loss": 0.6477, "step": 4395, "task_loss": 0.7489649057388306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3558977246284485, "epoch": 3.72, "learning_rate": 2.345731191885038e-05, "loss": 0.5066, "step": 4396, "task_loss": 0.4252975583076477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.845852255821228, "epoch": 3.72, "learning_rate": 2.345127400072455e-05, "loss": 0.6293, "step": 4397, "task_loss": 1.162513017654419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0114325284957886, "epoch": 3.72, "learning_rate": 2.344523608259872e-05, "loss": 0.6334, "step": 4398, "task_loss": 0.7668378353118896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7894441485404968, "epoch": 3.72, "learning_rate": 2.3439198164472892e-05, "loss": 0.6698, "step": 4399, "task_loss": 1.1614115238189697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5702123045921326, "epoch": 3.72, "learning_rate": 2.343316024634706e-05, "loss": 0.5268, "step": 4400, "task_loss": 0.32320210337638855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7098827362060547, "epoch": 3.72, "learning_rate": 2.342712232822123e-05, "loss": 0.6034, "step": 4401, "task_loss": 1.8859773874282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3972153961658478, "epoch": 3.72, "learning_rate": 2.34210844100954e-05, "loss": 0.6023, "step": 4402, "task_loss": 0.6027222871780396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3747714161872864, "epoch": 3.72, "learning_rate": 2.341504649196957e-05, "loss": 0.4614, "step": 4403, "task_loss": 0.41247859597206116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4844101071357727, "epoch": 3.72, "learning_rate": 2.3409008573843742e-05, "loss": 0.5485, "step": 4404, "task_loss": 0.36870938539505005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4122719466686249, "epoch": 3.72, "learning_rate": 2.340297065571791e-05, "loss": 0.4344, "step": 4405, "task_loss": 0.09296884387731552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.606812596321106, "epoch": 3.72, "learning_rate": 2.339693273759208e-05, "loss": 0.6583, "step": 4406, "task_loss": 0.9646396636962891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2747921645641327, "epoch": 3.72, "learning_rate": 2.339089481946625e-05, "loss": 0.6408, "step": 4407, "task_loss": 0.2483992576599121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3963993787765503, "epoch": 3.73, "learning_rate": 2.3384856901340417e-05, "loss": 0.644, "step": 4408, "task_loss": 0.4907647371292114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5062950253486633, "epoch": 3.73, "learning_rate": 2.337881898321459e-05, "loss": 0.5322, "step": 4409, "task_loss": 0.9860397577285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.440014511346817, "epoch": 3.73, "learning_rate": 2.337278106508876e-05, "loss": 0.6859, "step": 4410, "task_loss": 0.6240302920341492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44640499353408813, "epoch": 3.73, "learning_rate": 2.3366743146962926e-05, "loss": 0.7425, "step": 4411, "task_loss": 0.6822888851165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6823066473007202, "epoch": 3.73, "learning_rate": 2.33607052288371e-05, "loss": 0.5774, "step": 4412, "task_loss": 0.5784011483192444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6787376403808594, "epoch": 3.73, "learning_rate": 2.3354667310711267e-05, "loss": 0.5861, "step": 4413, "task_loss": 1.2307441234588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5095716714859009, "epoch": 3.73, "learning_rate": 2.3348629392585437e-05, "loss": 0.6124, "step": 4414, "task_loss": 1.086814522743225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0313618183135986, "epoch": 3.73, "learning_rate": 2.3342591474459608e-05, "loss": 0.548, "step": 4415, "task_loss": 0.7202802896499634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1274170875549316, "epoch": 3.73, "learning_rate": 2.3336553556333775e-05, "loss": 0.6686, "step": 4416, "task_loss": 0.7466042637825012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5515289306640625, "epoch": 3.73, "learning_rate": 2.333051563820795e-05, "loss": 0.6705, "step": 4417, "task_loss": 1.4633358716964722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17373959720134735, "epoch": 3.73, "learning_rate": 2.3324477720082116e-05, "loss": 0.6254, "step": 4418, "task_loss": 0.08294698596000671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39644840359687805, "epoch": 3.73, "learning_rate": 2.3318439801956287e-05, "loss": 0.5264, "step": 4419, "task_loss": 0.48362573981285095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5280262231826782, "epoch": 3.74, "learning_rate": 2.3312401883830457e-05, "loss": 0.5205, "step": 4420, "task_loss": 0.6926913857460022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6857432723045349, "epoch": 3.74, "learning_rate": 2.3306363965704624e-05, "loss": 0.6407, "step": 4421, "task_loss": 0.9331052303314209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35209646821022034, "epoch": 3.74, "learning_rate": 2.3300326047578795e-05, "loss": 0.5675, "step": 4422, "task_loss": 1.197452425956726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43328309059143066, "epoch": 3.74, "learning_rate": 2.3294288129452966e-05, "loss": 0.5529, "step": 4423, "task_loss": 1.5100347995758057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6814221143722534, "epoch": 3.74, "learning_rate": 2.3288250211327136e-05, "loss": 0.5762, "step": 4424, "task_loss": 1.0538145303726196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5875340700149536, "epoch": 3.74, "learning_rate": 2.3282212293201307e-05, "loss": 0.5315, "step": 4425, "task_loss": 1.5315380096435547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5955626964569092, "epoch": 3.74, "learning_rate": 2.3276174375075474e-05, "loss": 0.6559, "step": 4426, "task_loss": 0.336516410112381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5665879249572754, "epoch": 3.74, "learning_rate": 2.3270136456949645e-05, "loss": 0.4703, "step": 4427, "task_loss": 0.6884249448776245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5615999102592468, "epoch": 3.74, "learning_rate": 2.3264098538823815e-05, "loss": 0.6901, "step": 4428, "task_loss": 0.6692479848861694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44379621744155884, "epoch": 3.74, "learning_rate": 2.3258060620697986e-05, "loss": 0.4697, "step": 4429, "task_loss": 0.16171132028102875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7368588447570801, "epoch": 3.74, "learning_rate": 2.3252022702572153e-05, "loss": 0.9206, "step": 4430, "task_loss": 1.284806489944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8409979939460754, "epoch": 3.75, "learning_rate": 2.3245984784446323e-05, "loss": 0.6714, "step": 4431, "task_loss": 1.345111608505249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6538600325584412, "epoch": 3.75, "learning_rate": 2.3239946866320494e-05, "loss": 0.5493, "step": 4432, "task_loss": 0.9229645133018494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7139259576797485, "epoch": 3.75, "learning_rate": 2.3233908948194665e-05, "loss": 0.7461, "step": 4433, "task_loss": 0.712328314781189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6338579654693604, "epoch": 3.75, "learning_rate": 2.3227871030068835e-05, "loss": 0.6145, "step": 4434, "task_loss": 0.5481329560279846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6404789686203003, "epoch": 3.75, "learning_rate": 2.3221833111943002e-05, "loss": 0.5579, "step": 4435, "task_loss": 0.3292856514453888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6401785612106323, "epoch": 3.75, "learning_rate": 2.3215795193817173e-05, "loss": 0.6548, "step": 4436, "task_loss": 0.8043035268783569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2880534529685974, "epoch": 3.75, "learning_rate": 2.3209757275691343e-05, "loss": 0.5071, "step": 4437, "task_loss": 0.33581164479255676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9013627171516418, "epoch": 3.75, "learning_rate": 2.320371935756551e-05, "loss": 0.6583, "step": 4438, "task_loss": 0.6649416089057922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44268637895584106, "epoch": 3.75, "learning_rate": 2.3197681439439685e-05, "loss": 0.5224, "step": 4439, "task_loss": 0.267585426568985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4741356372833252, "epoch": 3.75, "learning_rate": 2.3191643521313852e-05, "loss": 0.5593, "step": 4440, "task_loss": 1.284013032913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9820587635040283, "epoch": 3.75, "learning_rate": 2.3185605603188022e-05, "loss": 0.615, "step": 4441, "task_loss": 0.6287606954574585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4262923300266266, "epoch": 3.75, "learning_rate": 2.3179567685062193e-05, "loss": 0.5218, "step": 4442, "task_loss": 0.6466645002365112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7400762438774109, "epoch": 3.76, "learning_rate": 2.317352976693636e-05, "loss": 0.607, "step": 4443, "task_loss": 0.6763919591903687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6467039585113525, "epoch": 3.76, "learning_rate": 2.3167491848810534e-05, "loss": 0.6712, "step": 4444, "task_loss": 1.332582712173462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5418936014175415, "epoch": 3.76, "learning_rate": 2.31614539306847e-05, "loss": 0.4331, "step": 4445, "task_loss": 0.9271002411842346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48879605531692505, "epoch": 3.76, "learning_rate": 2.315541601255887e-05, "loss": 0.6039, "step": 4446, "task_loss": 0.2636723816394806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7862344980239868, "epoch": 3.76, "learning_rate": 2.3149378094433042e-05, "loss": 0.6406, "step": 4447, "task_loss": 0.5329148769378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6804165840148926, "epoch": 3.76, "learning_rate": 2.314334017630721e-05, "loss": 0.6231, "step": 4448, "task_loss": 1.711234211921692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2958729565143585, "epoch": 3.76, "learning_rate": 2.313730225818138e-05, "loss": 0.4861, "step": 4449, "task_loss": 0.005031430162489414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6164088249206543, "epoch": 3.76, "learning_rate": 2.313126434005555e-05, "loss": 0.6242, "step": 4450, "task_loss": 1.404719591140747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.605385422706604, "epoch": 3.76, "learning_rate": 2.3125226421929718e-05, "loss": 0.6962, "step": 4451, "task_loss": 0.793285071849823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.168250560760498, "epoch": 3.76, "learning_rate": 2.3119188503803892e-05, "loss": 0.6267, "step": 4452, "task_loss": 0.5260794758796692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5163881778717041, "epoch": 3.76, "learning_rate": 2.311315058567806e-05, "loss": 0.4575, "step": 4453, "task_loss": 0.3756598234176636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.610989511013031, "epoch": 3.76, "learning_rate": 2.310711266755223e-05, "loss": 0.4606, "step": 4454, "task_loss": 0.7098262906074524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6581740975379944, "epoch": 3.77, "learning_rate": 2.31010747494264e-05, "loss": 0.7261, "step": 4455, "task_loss": 0.23161479830741882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.574173629283905, "epoch": 3.77, "learning_rate": 2.3095036831300567e-05, "loss": 0.7126, "step": 4456, "task_loss": 0.8425107002258301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9820064306259155, "epoch": 3.77, "learning_rate": 2.3088998913174738e-05, "loss": 0.5566, "step": 4457, "task_loss": 0.8681281805038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42446669936180115, "epoch": 3.77, "learning_rate": 2.308296099504891e-05, "loss": 0.6994, "step": 4458, "task_loss": 0.3902852237224579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31970882415771484, "epoch": 3.77, "learning_rate": 2.307692307692308e-05, "loss": 0.49, "step": 4459, "task_loss": 0.356161504983902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40275195240974426, "epoch": 3.77, "learning_rate": 2.307088515879725e-05, "loss": 0.5205, "step": 4460, "task_loss": 0.6699374914169312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48749005794525146, "epoch": 3.77, "learning_rate": 2.3064847240671417e-05, "loss": 0.5133, "step": 4461, "task_loss": 0.36602315306663513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33166787028312683, "epoch": 3.77, "learning_rate": 2.3058809322545587e-05, "loss": 0.4646, "step": 4462, "task_loss": 0.46980100870132446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4812576174736023, "epoch": 3.77, "learning_rate": 2.3052771404419758e-05, "loss": 0.4984, "step": 4463, "task_loss": 0.46535345911979675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8003339171409607, "epoch": 3.77, "learning_rate": 2.304673348629393e-05, "loss": 0.7401, "step": 4464, "task_loss": 1.0721266269683838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4290904402732849, "epoch": 3.77, "learning_rate": 2.3040695568168096e-05, "loss": 0.5695, "step": 4465, "task_loss": 0.7608657479286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43186700344085693, "epoch": 3.77, "learning_rate": 2.3034657650042266e-05, "loss": 0.4622, "step": 4466, "task_loss": 0.4474439024925232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4712008833885193, "epoch": 3.78, "learning_rate": 2.3028619731916437e-05, "loss": 0.4767, "step": 4467, "task_loss": 0.5881128907203674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5413079857826233, "epoch": 3.78, "learning_rate": 2.3022581813790607e-05, "loss": 0.6125, "step": 4468, "task_loss": 0.4645686149597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0193965435028076, "epoch": 3.78, "learning_rate": 2.3016543895664778e-05, "loss": 0.7346, "step": 4469, "task_loss": 2.3894333839416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3247479200363159, "epoch": 3.78, "learning_rate": 2.3010505977538945e-05, "loss": 0.3696, "step": 4470, "task_loss": 0.05943428352475166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44149720668792725, "epoch": 3.78, "learning_rate": 2.3004468059413116e-05, "loss": 0.5811, "step": 4471, "task_loss": 0.41300803422927856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6600164771080017, "epoch": 3.78, "learning_rate": 2.2998430141287286e-05, "loss": 0.5751, "step": 4472, "task_loss": 0.46976733207702637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33291852474212646, "epoch": 3.78, "learning_rate": 2.2992392223161454e-05, "loss": 0.4932, "step": 4473, "task_loss": 0.6892144680023193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.57462477684021, "epoch": 3.78, "learning_rate": 2.2986354305035627e-05, "loss": 0.5333, "step": 4474, "task_loss": 0.29683396220207214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7026301622390747, "epoch": 3.78, "learning_rate": 2.2980316386909795e-05, "loss": 0.7137, "step": 4475, "task_loss": 0.04255634546279907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20254918932914734, "epoch": 3.78, "learning_rate": 2.2974278468783962e-05, "loss": 0.5424, "step": 4476, "task_loss": 0.524287223815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7224549055099487, "epoch": 3.78, "learning_rate": 2.2968240550658136e-05, "loss": 0.5321, "step": 4477, "task_loss": 0.7353062629699707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48610368371009827, "epoch": 3.78, "learning_rate": 2.2962202632532303e-05, "loss": 0.5873, "step": 4478, "task_loss": 1.062229871749878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46096593141555786, "epoch": 3.79, "learning_rate": 2.2956164714406474e-05, "loss": 0.4934, "step": 4479, "task_loss": 0.41122597455978394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4773593544960022, "epoch": 3.79, "learning_rate": 2.2950126796280644e-05, "loss": 0.4989, "step": 4480, "task_loss": 0.08708696067333221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6095165014266968, "epoch": 3.79, "learning_rate": 2.294408887815481e-05, "loss": 0.5877, "step": 4481, "task_loss": 0.6403176188468933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4502122700214386, "epoch": 3.79, "learning_rate": 2.2938050960028985e-05, "loss": 0.582, "step": 4482, "task_loss": 0.9707988500595093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38119742274284363, "epoch": 3.79, "learning_rate": 2.2932013041903152e-05, "loss": 0.4812, "step": 4483, "task_loss": 0.16779783368110657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4454629421234131, "epoch": 3.79, "learning_rate": 2.2925975123777323e-05, "loss": 0.5661, "step": 4484, "task_loss": 1.1619364023208618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5072379112243652, "epoch": 3.79, "learning_rate": 2.2919937205651494e-05, "loss": 0.5655, "step": 4485, "task_loss": 0.7217161655426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6898370981216431, "epoch": 3.79, "learning_rate": 2.291389928752566e-05, "loss": 0.528, "step": 4486, "task_loss": 1.4738593101501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7601476311683655, "epoch": 3.79, "learning_rate": 2.290786136939983e-05, "loss": 0.5276, "step": 4487, "task_loss": 1.811610460281372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6716529130935669, "epoch": 3.79, "learning_rate": 2.2901823451274002e-05, "loss": 0.6035, "step": 4488, "task_loss": 1.3733972311019897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4354214668273926, "epoch": 3.79, "learning_rate": 2.2895785533148172e-05, "loss": 0.4646, "step": 4489, "task_loss": 0.6362360715866089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4062591791152954, "epoch": 3.79, "learning_rate": 2.2889747615022343e-05, "loss": 0.445, "step": 4490, "task_loss": 1.172103762626648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4040986895561218, "epoch": 3.8, "learning_rate": 2.288370969689651e-05, "loss": 0.5384, "step": 4491, "task_loss": 0.6108687520027161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4843246638774872, "epoch": 3.8, "learning_rate": 2.287767177877068e-05, "loss": 0.6359, "step": 4492, "task_loss": 0.8827519416809082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6765078902244568, "epoch": 3.8, "learning_rate": 2.287163386064485e-05, "loss": 0.6901, "step": 4493, "task_loss": 1.3149833679199219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7701029777526855, "epoch": 3.8, "learning_rate": 2.286559594251902e-05, "loss": 0.5815, "step": 4494, "task_loss": 0.8802153468132019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6770055890083313, "epoch": 3.8, "learning_rate": 2.285955802439319e-05, "loss": 0.468, "step": 4495, "task_loss": 0.7696042656898499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8490389585494995, "epoch": 3.8, "learning_rate": 2.285352010626736e-05, "loss": 0.6947, "step": 4496, "task_loss": 1.1617990732192993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5762185454368591, "epoch": 3.8, "learning_rate": 2.284748218814153e-05, "loss": 0.5932, "step": 4497, "task_loss": 0.6689549088478088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7190088629722595, "epoch": 3.8, "learning_rate": 2.28414442700157e-05, "loss": 0.5068, "step": 4498, "task_loss": 0.418878436088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.258762001991272, "epoch": 3.8, "learning_rate": 2.2835406351889868e-05, "loss": 0.532, "step": 4499, "task_loss": 0.11511826515197754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27852538228034973, "epoch": 3.8, "learning_rate": 2.282936843376404e-05, "loss": 0.6075, "step": 4500, "task_loss": 1.213998556137085 }, { "epoch": 3.8, "eval_accuracy": 0.9011485148514852, "eval_loss": 0.3505952060222626, "eval_runtime": 227.5401, "eval_samples_per_second": 110.969, "eval_steps_per_second": 0.87, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8224712014198303, "epoch": 3.8, "learning_rate": 2.282333051563821e-05, "loss": 0.6948, "step": 4501, "task_loss": 0.3107118308544159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37828683853149414, "epoch": 3.81, "learning_rate": 2.281729259751238e-05, "loss": 0.5334, "step": 4502, "task_loss": 0.12263364344835281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45930367708206177, "epoch": 3.81, "learning_rate": 2.2811254679386547e-05, "loss": 0.4597, "step": 4503, "task_loss": 0.5213666558265686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3877778649330139, "epoch": 3.81, "learning_rate": 2.2805216761260718e-05, "loss": 0.5537, "step": 4504, "task_loss": 0.23105968534946442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5131558179855347, "epoch": 3.81, "learning_rate": 2.2799178843134888e-05, "loss": 0.5952, "step": 4505, "task_loss": 0.6777783036231995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7000924348831177, "epoch": 3.81, "learning_rate": 2.279314092500906e-05, "loss": 0.5613, "step": 4506, "task_loss": 1.1955558061599731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6547091603279114, "epoch": 3.81, "learning_rate": 2.278710300688323e-05, "loss": 0.4627, "step": 4507, "task_loss": 1.5050053596496582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27803266048431396, "epoch": 3.81, "learning_rate": 2.2781065088757396e-05, "loss": 0.4563, "step": 4508, "task_loss": 0.48284903168678284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8124231100082397, "epoch": 3.81, "learning_rate": 2.2775027170631567e-05, "loss": 0.589, "step": 4509, "task_loss": 0.3941148817539215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.221726417541504, "epoch": 3.81, "learning_rate": 2.2768989252505738e-05, "loss": 0.7193, "step": 4510, "task_loss": 1.5887213945388794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7639302015304565, "epoch": 3.81, "learning_rate": 2.2762951334379905e-05, "loss": 0.6239, "step": 4511, "task_loss": 0.7167043685913086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9234924912452698, "epoch": 3.81, "learning_rate": 2.275691341625408e-05, "loss": 0.6194, "step": 4512, "task_loss": 1.0194989442825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4466555416584015, "epoch": 3.81, "learning_rate": 2.2750875498128246e-05, "loss": 0.4811, "step": 4513, "task_loss": 0.60932457447052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40963804721832275, "epoch": 3.82, "learning_rate": 2.2744837580002416e-05, "loss": 0.5943, "step": 4514, "task_loss": 0.33377841114997864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39531293511390686, "epoch": 3.82, "learning_rate": 2.2738799661876587e-05, "loss": 0.3894, "step": 4515, "task_loss": 0.9314152598381042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3533097505569458, "epoch": 3.82, "learning_rate": 2.2732761743750754e-05, "loss": 0.4136, "step": 4516, "task_loss": 0.23366022109985352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42140284180641174, "epoch": 3.82, "learning_rate": 2.2726723825624928e-05, "loss": 0.6782, "step": 4517, "task_loss": 1.0073362588882446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4779048562049866, "epoch": 3.82, "learning_rate": 2.2720685907499095e-05, "loss": 0.5234, "step": 4518, "task_loss": 0.7014445662498474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4349392354488373, "epoch": 3.82, "learning_rate": 2.2714647989373263e-05, "loss": 0.4053, "step": 4519, "task_loss": 0.6497784852981567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41986557841300964, "epoch": 3.82, "learning_rate": 2.2708610071247436e-05, "loss": 0.4378, "step": 4520, "task_loss": 0.30898967385292053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44661352038383484, "epoch": 3.82, "learning_rate": 2.2702572153121604e-05, "loss": 0.52, "step": 4521, "task_loss": 1.5813034772872925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5483782291412354, "epoch": 3.82, "learning_rate": 2.2696534234995774e-05, "loss": 0.5193, "step": 4522, "task_loss": 0.13440847396850586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6100765466690063, "epoch": 3.82, "learning_rate": 2.2690496316869945e-05, "loss": 0.6274, "step": 4523, "task_loss": 0.2875552177429199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5049988627433777, "epoch": 3.82, "learning_rate": 2.2684458398744112e-05, "loss": 0.5665, "step": 4524, "task_loss": 0.8494818806648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8322737216949463, "epoch": 3.82, "learning_rate": 2.2678420480618286e-05, "loss": 0.6605, "step": 4525, "task_loss": 0.4764504134654999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4672878086566925, "epoch": 3.83, "learning_rate": 2.2672382562492453e-05, "loss": 0.5977, "step": 4526, "task_loss": 0.8541036248207092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6046249866485596, "epoch": 3.83, "learning_rate": 2.2666344644366624e-05, "loss": 0.5542, "step": 4527, "task_loss": 0.781278133392334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4827496409416199, "epoch": 3.83, "learning_rate": 2.2660306726240794e-05, "loss": 0.5586, "step": 4528, "task_loss": 0.9666576981544495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7295994758605957, "epoch": 3.83, "learning_rate": 2.265426880811496e-05, "loss": 0.5453, "step": 4529, "task_loss": 0.2307213395833969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7430315613746643, "epoch": 3.83, "learning_rate": 2.2648230889989132e-05, "loss": 0.6353, "step": 4530, "task_loss": 0.7285252809524536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6091195344924927, "epoch": 3.83, "learning_rate": 2.2642192971863303e-05, "loss": 0.6816, "step": 4531, "task_loss": 1.1465976238250732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5833025574684143, "epoch": 3.83, "learning_rate": 2.2636155053737473e-05, "loss": 0.6652, "step": 4532, "task_loss": 0.520223081111908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9356515407562256, "epoch": 3.83, "learning_rate": 2.263011713561164e-05, "loss": 0.6789, "step": 4533, "task_loss": 0.8323215246200562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5891568660736084, "epoch": 3.83, "learning_rate": 2.262407921748581e-05, "loss": 0.4938, "step": 4534, "task_loss": 0.3939654529094696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8318091034889221, "epoch": 3.83, "learning_rate": 2.261804129935998e-05, "loss": 0.723, "step": 4535, "task_loss": 2.1684114933013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4679873585700989, "epoch": 3.83, "learning_rate": 2.2612003381234152e-05, "loss": 0.5138, "step": 4536, "task_loss": 0.41602635383605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5540313720703125, "epoch": 3.83, "learning_rate": 2.2605965463108323e-05, "loss": 0.5463, "step": 4537, "task_loss": 0.6549892425537109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42273813486099243, "epoch": 3.84, "learning_rate": 2.259992754498249e-05, "loss": 0.5119, "step": 4538, "task_loss": 0.43286681175231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4999549984931946, "epoch": 3.84, "learning_rate": 2.259388962685666e-05, "loss": 0.5005, "step": 4539, "task_loss": 1.000728726387024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7849093079566956, "epoch": 3.84, "learning_rate": 2.258785170873083e-05, "loss": 0.788, "step": 4540, "task_loss": 0.9882803559303284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46597665548324585, "epoch": 3.84, "learning_rate": 2.2581813790604998e-05, "loss": 0.545, "step": 4541, "task_loss": 0.5382664799690247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5338019132614136, "epoch": 3.84, "learning_rate": 2.2575775872479172e-05, "loss": 0.5392, "step": 4542, "task_loss": 0.30885669589042664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7065733075141907, "epoch": 3.84, "learning_rate": 2.256973795435334e-05, "loss": 0.6024, "step": 4543, "task_loss": 1.2602601051330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5769373178482056, "epoch": 3.84, "learning_rate": 2.256370003622751e-05, "loss": 0.6799, "step": 4544, "task_loss": 0.5817105770111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5890848636627197, "epoch": 3.84, "learning_rate": 2.255766211810168e-05, "loss": 0.6889, "step": 4545, "task_loss": 0.142110213637352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.527262806892395, "epoch": 3.84, "learning_rate": 2.2551624199975848e-05, "loss": 0.5109, "step": 4546, "task_loss": 1.1303462982177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.530597984790802, "epoch": 3.84, "learning_rate": 2.254558628185002e-05, "loss": 0.5752, "step": 4547, "task_loss": 0.8478494882583618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5579608082771301, "epoch": 3.84, "learning_rate": 2.253954836372419e-05, "loss": 0.5511, "step": 4548, "task_loss": 1.1049139499664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4899434745311737, "epoch": 3.84, "learning_rate": 2.2533510445598356e-05, "loss": 0.57, "step": 4549, "task_loss": 0.872226357460022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6326770782470703, "epoch": 3.85, "learning_rate": 2.252747252747253e-05, "loss": 0.663, "step": 4550, "task_loss": 0.5758929252624512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3767385482788086, "epoch": 3.85, "learning_rate": 2.2521434609346697e-05, "loss": 0.554, "step": 4551, "task_loss": 0.6987469792366028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.536481499671936, "epoch": 3.85, "learning_rate": 2.2515396691220868e-05, "loss": 0.5169, "step": 4552, "task_loss": 0.5736947655677795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4679645597934723, "epoch": 3.85, "learning_rate": 2.2509358773095038e-05, "loss": 0.6493, "step": 4553, "task_loss": 1.4364482164382935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5526278018951416, "epoch": 3.85, "learning_rate": 2.2503320854969205e-05, "loss": 0.4566, "step": 4554, "task_loss": 0.7572150826454163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6572176218032837, "epoch": 3.85, "learning_rate": 2.249728293684338e-05, "loss": 0.6603, "step": 4555, "task_loss": 1.5873498916625977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5072602033615112, "epoch": 3.85, "learning_rate": 2.2491245018717547e-05, "loss": 0.5489, "step": 4556, "task_loss": 0.3946949243545532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5887047052383423, "epoch": 3.85, "learning_rate": 2.2485207100591717e-05, "loss": 0.4857, "step": 4557, "task_loss": 0.32903701066970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6570155620574951, "epoch": 3.85, "learning_rate": 2.2479169182465888e-05, "loss": 0.6031, "step": 4558, "task_loss": 0.5098884105682373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46782779693603516, "epoch": 3.85, "learning_rate": 2.2473131264340055e-05, "loss": 0.5025, "step": 4559, "task_loss": 0.6768575310707092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6555294990539551, "epoch": 3.85, "learning_rate": 2.2467093346214225e-05, "loss": 0.638, "step": 4560, "task_loss": 1.189415454864502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3998187780380249, "epoch": 3.85, "learning_rate": 2.2461055428088396e-05, "loss": 0.4107, "step": 4561, "task_loss": 0.3535372316837311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7637245655059814, "epoch": 3.86, "learning_rate": 2.2455017509962567e-05, "loss": 0.557, "step": 4562, "task_loss": 0.30867066979408264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41419517993927, "epoch": 3.86, "learning_rate": 2.2448979591836737e-05, "loss": 0.6637, "step": 4563, "task_loss": 0.5748609304428101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3557615280151367, "epoch": 3.86, "learning_rate": 2.2442941673710904e-05, "loss": 0.4002, "step": 4564, "task_loss": 0.19731488823890686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5281579494476318, "epoch": 3.86, "learning_rate": 2.2436903755585075e-05, "loss": 0.4938, "step": 4565, "task_loss": 0.12420077621936798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3770504593849182, "epoch": 3.86, "learning_rate": 2.2430865837459245e-05, "loss": 0.5466, "step": 4566, "task_loss": 0.49097180366516113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.596808910369873, "epoch": 3.86, "learning_rate": 2.2424827919333416e-05, "loss": 0.527, "step": 4567, "task_loss": 0.3285331726074219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6039056777954102, "epoch": 3.86, "learning_rate": 2.2418790001207583e-05, "loss": 0.597, "step": 4568, "task_loss": 0.8137860298156738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23368346691131592, "epoch": 3.86, "learning_rate": 2.2412752083081754e-05, "loss": 0.5114, "step": 4569, "task_loss": 0.0795210674405098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5441118478775024, "epoch": 3.86, "learning_rate": 2.2406714164955924e-05, "loss": 0.629, "step": 4570, "task_loss": 0.772313117980957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4644792973995209, "epoch": 3.86, "learning_rate": 2.2400676246830095e-05, "loss": 0.5058, "step": 4571, "task_loss": 0.9556580781936646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2741735875606537, "epoch": 3.86, "learning_rate": 2.2394638328704266e-05, "loss": 0.6365, "step": 4572, "task_loss": 0.32410821318626404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.804934561252594, "epoch": 3.87, "learning_rate": 2.2388600410578433e-05, "loss": 0.5288, "step": 4573, "task_loss": 0.8876804113388062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4231383800506592, "epoch": 3.87, "learning_rate": 2.2382562492452603e-05, "loss": 0.6129, "step": 4574, "task_loss": 0.540960967540741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43521153926849365, "epoch": 3.87, "learning_rate": 2.2376524574326774e-05, "loss": 0.5655, "step": 4575, "task_loss": 0.44945383071899414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23040595650672913, "epoch": 3.87, "learning_rate": 2.237048665620094e-05, "loss": 0.4342, "step": 4576, "task_loss": 0.4913996458053589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.4340713024139404, "epoch": 3.87, "learning_rate": 2.2364448738075115e-05, "loss": 0.7879, "step": 4577, "task_loss": 1.3690781593322754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5097252726554871, "epoch": 3.87, "learning_rate": 2.2358410819949282e-05, "loss": 0.487, "step": 4578, "task_loss": 0.5746940970420837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5900717973709106, "epoch": 3.87, "learning_rate": 2.2352372901823453e-05, "loss": 0.5991, "step": 4579, "task_loss": 1.0881357192993164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3439572751522064, "epoch": 3.87, "learning_rate": 2.2346334983697623e-05, "loss": 0.5441, "step": 4580, "task_loss": 0.6399039030075073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6858565807342529, "epoch": 3.87, "learning_rate": 2.234029706557179e-05, "loss": 0.5342, "step": 4581, "task_loss": 0.2370939403772354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45418429374694824, "epoch": 3.87, "learning_rate": 2.2334259147445964e-05, "loss": 0.5826, "step": 4582, "task_loss": 0.1713087111711502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6021031737327576, "epoch": 3.87, "learning_rate": 2.232822122932013e-05, "loss": 0.4871, "step": 4583, "task_loss": 0.502464234828949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9733784198760986, "epoch": 3.87, "learning_rate": 2.23221833111943e-05, "loss": 0.642, "step": 4584, "task_loss": 0.7405000329017639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46803581714630127, "epoch": 3.88, "learning_rate": 2.2316145393068473e-05, "loss": 0.5062, "step": 4585, "task_loss": 1.227565884590149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8267889022827148, "epoch": 3.88, "learning_rate": 2.231010747494264e-05, "loss": 0.6598, "step": 4586, "task_loss": 0.40738967061042786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38898128271102905, "epoch": 3.88, "learning_rate": 2.230406955681681e-05, "loss": 0.5098, "step": 4587, "task_loss": 0.023207519203424454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4987633526325226, "epoch": 3.88, "learning_rate": 2.229803163869098e-05, "loss": 0.4669, "step": 4588, "task_loss": 0.48157164454460144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.612311840057373, "epoch": 3.88, "learning_rate": 2.2291993720565148e-05, "loss": 0.5674, "step": 4589, "task_loss": 1.6587328910827637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9272332191467285, "epoch": 3.88, "learning_rate": 2.2285955802439322e-05, "loss": 0.7854, "step": 4590, "task_loss": 1.8588898181915283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5013290643692017, "epoch": 3.88, "learning_rate": 2.227991788431349e-05, "loss": 0.6576, "step": 4591, "task_loss": 1.0973527431488037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.641532301902771, "epoch": 3.88, "learning_rate": 2.227387996618766e-05, "loss": 0.5807, "step": 4592, "task_loss": 1.6154718399047852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7165365815162659, "epoch": 3.88, "learning_rate": 2.226784204806183e-05, "loss": 0.5641, "step": 4593, "task_loss": 1.4866337776184082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7219924330711365, "epoch": 3.88, "learning_rate": 2.2261804129935998e-05, "loss": 0.6215, "step": 4594, "task_loss": 1.0490946769714355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4483376145362854, "epoch": 3.88, "learning_rate": 2.225576621181017e-05, "loss": 0.6278, "step": 4595, "task_loss": 0.782343864440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6821736693382263, "epoch": 3.88, "learning_rate": 2.224972829368434e-05, "loss": 0.7046, "step": 4596, "task_loss": 1.5550223588943481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0510276556015015, "epoch": 3.89, "learning_rate": 2.224369037555851e-05, "loss": 0.5766, "step": 4597, "task_loss": 0.7875450253486633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34397757053375244, "epoch": 3.89, "learning_rate": 2.2237652457432677e-05, "loss": 0.4682, "step": 4598, "task_loss": 0.11856172233819962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7001373767852783, "epoch": 3.89, "learning_rate": 2.2231614539306847e-05, "loss": 0.4381, "step": 4599, "task_loss": 0.4853045344352722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37415122985839844, "epoch": 3.89, "learning_rate": 2.2225576621181018e-05, "loss": 0.4368, "step": 4600, "task_loss": 1.010152816772461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5745044350624084, "epoch": 3.89, "learning_rate": 2.221953870305519e-05, "loss": 0.5285, "step": 4601, "task_loss": 0.4524754285812378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3566026985645294, "epoch": 3.89, "learning_rate": 2.221350078492936e-05, "loss": 0.5408, "step": 4602, "task_loss": 0.08953599631786346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4229121208190918, "epoch": 3.89, "learning_rate": 2.2207462866803526e-05, "loss": 0.7673, "step": 4603, "task_loss": 0.07930503040552139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7159371376037598, "epoch": 3.89, "learning_rate": 2.2201424948677697e-05, "loss": 0.6315, "step": 4604, "task_loss": 1.2674273252487183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.571043848991394, "epoch": 3.89, "learning_rate": 2.2195387030551867e-05, "loss": 0.583, "step": 4605, "task_loss": 1.2315365076065063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5418980717658997, "epoch": 3.89, "learning_rate": 2.2189349112426034e-05, "loss": 0.7563, "step": 4606, "task_loss": 0.5784525275230408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4133026897907257, "epoch": 3.89, "learning_rate": 2.218331119430021e-05, "loss": 0.4251, "step": 4607, "task_loss": 0.3497888147830963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3941519558429718, "epoch": 3.89, "learning_rate": 2.2177273276174376e-05, "loss": 0.5483, "step": 4608, "task_loss": 0.36794236302375793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4497247338294983, "epoch": 3.9, "learning_rate": 2.2171235358048546e-05, "loss": 0.5625, "step": 4609, "task_loss": 0.5561099052429199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4860917329788208, "epoch": 3.9, "learning_rate": 2.2165197439922717e-05, "loss": 0.6227, "step": 4610, "task_loss": 0.8884359002113342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9359780550003052, "epoch": 3.9, "learning_rate": 2.2159159521796884e-05, "loss": 0.6513, "step": 4611, "task_loss": 0.9636844396591187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8478249311447144, "epoch": 3.9, "learning_rate": 2.2153121603671058e-05, "loss": 0.689, "step": 4612, "task_loss": 0.7148102521896362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41006070375442505, "epoch": 3.9, "learning_rate": 2.2147083685545225e-05, "loss": 0.4983, "step": 4613, "task_loss": 0.46146488189697266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2889275848865509, "epoch": 3.9, "learning_rate": 2.2141045767419392e-05, "loss": 0.6655, "step": 4614, "task_loss": 0.41868874430656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5516269207000732, "epoch": 3.9, "learning_rate": 2.2135007849293566e-05, "loss": 0.4877, "step": 4615, "task_loss": 0.5106433629989624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5226693749427795, "epoch": 3.9, "learning_rate": 2.2128969931167733e-05, "loss": 0.661, "step": 4616, "task_loss": 0.321419358253479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45152196288108826, "epoch": 3.9, "learning_rate": 2.2122932013041904e-05, "loss": 0.5746, "step": 4617, "task_loss": 1.1926885843276978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4267456531524658, "epoch": 3.9, "learning_rate": 2.2116894094916075e-05, "loss": 0.6823, "step": 4618, "task_loss": 0.8775150775909424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4764678478240967, "epoch": 3.9, "learning_rate": 2.2110856176790242e-05, "loss": 0.7081, "step": 4619, "task_loss": 0.9472035765647888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5739539861679077, "epoch": 3.9, "learning_rate": 2.2104818258664416e-05, "loss": 0.6143, "step": 4620, "task_loss": 1.3189697265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5950912237167358, "epoch": 3.91, "learning_rate": 2.2098780340538583e-05, "loss": 0.7556, "step": 4621, "task_loss": 0.34443068504333496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.843063473701477, "epoch": 3.91, "learning_rate": 2.2092742422412753e-05, "loss": 0.6551, "step": 4622, "task_loss": 1.0514072179794312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6285462975502014, "epoch": 3.91, "learning_rate": 2.2086704504286924e-05, "loss": 0.7954, "step": 4623, "task_loss": 1.5145877599716187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9002059698104858, "epoch": 3.91, "learning_rate": 2.208066658616109e-05, "loss": 0.6222, "step": 4624, "task_loss": 0.5354745388031006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6272610425949097, "epoch": 3.91, "learning_rate": 2.2074628668035262e-05, "loss": 0.6092, "step": 4625, "task_loss": 1.33392333984375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47450876235961914, "epoch": 3.91, "learning_rate": 2.2068590749909432e-05, "loss": 0.5909, "step": 4626, "task_loss": 0.6318022012710571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7187767028808594, "epoch": 3.91, "learning_rate": 2.2062552831783603e-05, "loss": 0.632, "step": 4627, "task_loss": 0.6666658520698547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35812804102897644, "epoch": 3.91, "learning_rate": 2.2056514913657773e-05, "loss": 0.4955, "step": 4628, "task_loss": 1.4737628698349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.695568859577179, "epoch": 3.91, "learning_rate": 2.205047699553194e-05, "loss": 0.6705, "step": 4629, "task_loss": 1.3606468439102173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8018062114715576, "epoch": 3.91, "learning_rate": 2.204443907740611e-05, "loss": 0.5904, "step": 4630, "task_loss": 0.9940068125724792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39298880100250244, "epoch": 3.91, "learning_rate": 2.2038401159280282e-05, "loss": 0.4934, "step": 4631, "task_loss": 0.48967358469963074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6605417728424072, "epoch": 3.91, "learning_rate": 2.2032363241154452e-05, "loss": 0.4987, "step": 4632, "task_loss": 1.1239429712295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5846738815307617, "epoch": 3.92, "learning_rate": 2.202632532302862e-05, "loss": 0.6379, "step": 4633, "task_loss": 0.9471979737281799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28248295187950134, "epoch": 3.92, "learning_rate": 2.202028740490279e-05, "loss": 0.5512, "step": 4634, "task_loss": 0.10429779440164566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5506902933120728, "epoch": 3.92, "learning_rate": 2.201424948677696e-05, "loss": 0.6641, "step": 4635, "task_loss": 0.960847020149231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9819222688674927, "epoch": 3.92, "learning_rate": 2.200821156865113e-05, "loss": 0.7708, "step": 4636, "task_loss": 0.5021255612373352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7932045459747314, "epoch": 3.92, "learning_rate": 2.2002173650525302e-05, "loss": 0.6882, "step": 4637, "task_loss": 1.3558275699615479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45658987760543823, "epoch": 3.92, "learning_rate": 2.199613573239947e-05, "loss": 0.5658, "step": 4638, "task_loss": 0.4359578490257263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2630814015865326, "epoch": 3.92, "learning_rate": 2.199009781427364e-05, "loss": 0.4593, "step": 4639, "task_loss": 0.04308855161070824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2529233396053314, "epoch": 3.92, "learning_rate": 2.198405989614781e-05, "loss": 0.3448, "step": 4640, "task_loss": 0.6680459380149841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4133487939834595, "epoch": 3.92, "learning_rate": 2.1978021978021977e-05, "loss": 0.5052, "step": 4641, "task_loss": 0.19064028561115265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3400127589702606, "epoch": 3.92, "learning_rate": 2.197198405989615e-05, "loss": 0.5259, "step": 4642, "task_loss": 0.050473976880311966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30520859360694885, "epoch": 3.92, "learning_rate": 2.196594614177032e-05, "loss": 0.713, "step": 4643, "task_loss": 0.6662929058074951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7541505098342896, "epoch": 3.93, "learning_rate": 2.195990822364449e-05, "loss": 0.4854, "step": 4644, "task_loss": 0.4065546691417694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8105065226554871, "epoch": 3.93, "learning_rate": 2.195387030551866e-05, "loss": 0.6499, "step": 4645, "task_loss": 0.3908928632736206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41057074069976807, "epoch": 3.93, "learning_rate": 2.1947832387392827e-05, "loss": 0.6082, "step": 4646, "task_loss": 0.2025093138217926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6684619188308716, "epoch": 3.93, "learning_rate": 2.1941794469267e-05, "loss": 0.6339, "step": 4647, "task_loss": 1.867443323135376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5535681247711182, "epoch": 3.93, "learning_rate": 2.1935756551141168e-05, "loss": 0.6595, "step": 4648, "task_loss": 0.9673780798912048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5286191701889038, "epoch": 3.93, "learning_rate": 2.1929718633015335e-05, "loss": 0.5135, "step": 4649, "task_loss": 0.16273073852062225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6012125015258789, "epoch": 3.93, "learning_rate": 2.192368071488951e-05, "loss": 0.5812, "step": 4650, "task_loss": 0.31297779083251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41989293694496155, "epoch": 3.93, "learning_rate": 2.1917642796763676e-05, "loss": 0.5901, "step": 4651, "task_loss": 1.8514149188995361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7118070125579834, "epoch": 3.93, "learning_rate": 2.1911604878637847e-05, "loss": 0.5651, "step": 4652, "task_loss": 0.3118521273136139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46281954646110535, "epoch": 3.93, "learning_rate": 2.1905566960512017e-05, "loss": 0.5534, "step": 4653, "task_loss": 0.9512856006622314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8008638024330139, "epoch": 3.93, "learning_rate": 2.1899529042386185e-05, "loss": 0.7491, "step": 4654, "task_loss": 0.4393361210823059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6376082301139832, "epoch": 3.93, "learning_rate": 2.189349112426036e-05, "loss": 0.6085, "step": 4655, "task_loss": 0.510701060295105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7970024943351746, "epoch": 3.94, "learning_rate": 2.1887453206134526e-05, "loss": 0.6141, "step": 4656, "task_loss": 0.9412413239479065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49286746978759766, "epoch": 3.94, "learning_rate": 2.1881415288008696e-05, "loss": 0.5088, "step": 4657, "task_loss": 0.2836083471775055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5873324871063232, "epoch": 3.94, "learning_rate": 2.1875377369882867e-05, "loss": 0.4445, "step": 4658, "task_loss": 1.1917039155960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8191130757331848, "epoch": 3.94, "learning_rate": 2.1869339451757034e-05, "loss": 0.5338, "step": 4659, "task_loss": 0.5051819086074829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2859245240688324, "epoch": 3.94, "learning_rate": 2.1863301533631205e-05, "loss": 0.6807, "step": 4660, "task_loss": 1.2019400596618652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36683204770088196, "epoch": 3.94, "learning_rate": 2.1857263615505375e-05, "loss": 0.5412, "step": 4661, "task_loss": 1.4809026718139648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4598342776298523, "epoch": 3.94, "learning_rate": 2.1851225697379546e-05, "loss": 0.5232, "step": 4662, "task_loss": 0.1396491676568985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6264371871948242, "epoch": 3.94, "learning_rate": 2.1845187779253713e-05, "loss": 0.7501, "step": 4663, "task_loss": 0.33619552850723267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5229457020759583, "epoch": 3.94, "learning_rate": 2.1839149861127884e-05, "loss": 0.5529, "step": 4664, "task_loss": 0.26591020822525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4617083668708801, "epoch": 3.94, "learning_rate": 2.1833111943002054e-05, "loss": 0.4537, "step": 4665, "task_loss": 0.9166950583457947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.623773992061615, "epoch": 3.94, "learning_rate": 2.1827074024876225e-05, "loss": 0.5949, "step": 4666, "task_loss": 0.3137950897216797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6860369443893433, "epoch": 3.94, "learning_rate": 2.1821036106750395e-05, "loss": 0.736, "step": 4667, "task_loss": 1.0741709470748901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4295746386051178, "epoch": 3.95, "learning_rate": 2.1814998188624562e-05, "loss": 0.578, "step": 4668, "task_loss": 0.8879985809326172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4842042326927185, "epoch": 3.95, "learning_rate": 2.1808960270498733e-05, "loss": 0.7262, "step": 4669, "task_loss": 1.3817460536956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3290368914604187, "epoch": 3.95, "learning_rate": 2.1802922352372904e-05, "loss": 0.4436, "step": 4670, "task_loss": 0.3827207684516907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6237121224403381, "epoch": 3.95, "learning_rate": 2.179688443424707e-05, "loss": 0.4726, "step": 4671, "task_loss": 0.996856153011322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2989993691444397, "epoch": 3.95, "learning_rate": 2.179084651612124e-05, "loss": 0.5603, "step": 4672, "task_loss": 0.8256833553314209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5866856575012207, "epoch": 3.95, "learning_rate": 2.1784808597995412e-05, "loss": 0.6637, "step": 4673, "task_loss": 0.6276038289070129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7544771432876587, "epoch": 3.95, "learning_rate": 2.1778770679869582e-05, "loss": 0.6367, "step": 4674, "task_loss": 0.6560112833976746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6964298486709595, "epoch": 3.95, "learning_rate": 2.1772732761743753e-05, "loss": 0.6865, "step": 4675, "task_loss": 0.3552161157131195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5033692121505737, "epoch": 3.95, "learning_rate": 2.176669484361792e-05, "loss": 0.5288, "step": 4676, "task_loss": 0.8332726955413818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.74857097864151, "epoch": 3.95, "learning_rate": 2.176065692549209e-05, "loss": 0.4921, "step": 4677, "task_loss": 0.6777297854423523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5213508009910583, "epoch": 3.95, "learning_rate": 2.175461900736626e-05, "loss": 0.5211, "step": 4678, "task_loss": 1.1930798292160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6845307946205139, "epoch": 3.95, "learning_rate": 2.174858108924043e-05, "loss": 0.4987, "step": 4679, "task_loss": 0.8212258219718933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.718172013759613, "epoch": 3.96, "learning_rate": 2.1742543171114602e-05, "loss": 0.6545, "step": 4680, "task_loss": 0.7680811882019043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.523149847984314, "epoch": 3.96, "learning_rate": 2.173650525298877e-05, "loss": 0.8008, "step": 4681, "task_loss": 0.34052520990371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5815091729164124, "epoch": 3.96, "learning_rate": 2.173046733486294e-05, "loss": 0.5677, "step": 4682, "task_loss": 0.6031589508056641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4138781726360321, "epoch": 3.96, "learning_rate": 2.172442941673711e-05, "loss": 0.5525, "step": 4683, "task_loss": 0.6251529455184937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5561714768409729, "epoch": 3.96, "learning_rate": 2.1718391498611278e-05, "loss": 0.6177, "step": 4684, "task_loss": 1.3499115705490112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.247907817363739, "epoch": 3.96, "learning_rate": 2.1712353580485452e-05, "loss": 0.4063, "step": 4685, "task_loss": 0.5688174366950989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5097026824951172, "epoch": 3.96, "learning_rate": 2.170631566235962e-05, "loss": 0.574, "step": 4686, "task_loss": 0.8796315789222717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4184175729751587, "epoch": 3.96, "learning_rate": 2.1700277744233786e-05, "loss": 0.4424, "step": 4687, "task_loss": 1.1460038423538208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.591058611869812, "epoch": 3.96, "learning_rate": 2.169423982610796e-05, "loss": 0.592, "step": 4688, "task_loss": 0.3004400134086609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5734179019927979, "epoch": 3.96, "learning_rate": 2.1688201907982127e-05, "loss": 0.6525, "step": 4689, "task_loss": 0.8673804402351379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5321282148361206, "epoch": 3.96, "learning_rate": 2.1682163989856298e-05, "loss": 0.5689, "step": 4690, "task_loss": 0.2225162833929062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.639796257019043, "epoch": 3.96, "learning_rate": 2.167612607173047e-05, "loss": 0.5784, "step": 4691, "task_loss": 1.452883005142212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7429713010787964, "epoch": 3.97, "learning_rate": 2.1670088153604636e-05, "loss": 0.6937, "step": 4692, "task_loss": 0.8675745129585266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6331377029418945, "epoch": 3.97, "learning_rate": 2.166405023547881e-05, "loss": 0.6601, "step": 4693, "task_loss": 0.9951923489570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35508954524993896, "epoch": 3.97, "learning_rate": 2.1658012317352977e-05, "loss": 0.5008, "step": 4694, "task_loss": 0.9431642889976501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.407417893409729, "epoch": 3.97, "learning_rate": 2.1651974399227148e-05, "loss": 0.4959, "step": 4695, "task_loss": 0.3801063001155853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6112456917762756, "epoch": 3.97, "learning_rate": 2.1645936481101318e-05, "loss": 0.5073, "step": 4696, "task_loss": 0.7563797235488892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.479740172624588, "epoch": 3.97, "learning_rate": 2.1639898562975485e-05, "loss": 0.5015, "step": 4697, "task_loss": 0.4670616388320923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5717419385910034, "epoch": 3.97, "learning_rate": 2.1633860644849656e-05, "loss": 0.4939, "step": 4698, "task_loss": 1.5216275453567505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5171864032745361, "epoch": 3.97, "learning_rate": 2.1627822726723826e-05, "loss": 0.5194, "step": 4699, "task_loss": 0.7689891457557678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30437183380126953, "epoch": 3.97, "learning_rate": 2.1621784808597997e-05, "loss": 0.4351, "step": 4700, "task_loss": 0.49128419160842896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.593900740146637, "epoch": 3.97, "learning_rate": 2.1615746890472168e-05, "loss": 0.5984, "step": 4701, "task_loss": 0.7727730870246887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49488240480422974, "epoch": 3.97, "learning_rate": 2.1609708972346335e-05, "loss": 0.6023, "step": 4702, "task_loss": 0.47161537408828735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7399104833602905, "epoch": 3.97, "learning_rate": 2.1603671054220505e-05, "loss": 0.607, "step": 4703, "task_loss": 0.535976231098175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4552808403968811, "epoch": 3.98, "learning_rate": 2.1597633136094676e-05, "loss": 0.717, "step": 4704, "task_loss": 0.17608077824115753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6849422454833984, "epoch": 3.98, "learning_rate": 2.1591595217968846e-05, "loss": 0.6083, "step": 4705, "task_loss": 0.7298884391784668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6557953357696533, "epoch": 3.98, "learning_rate": 2.1585557299843014e-05, "loss": 0.6359, "step": 4706, "task_loss": 0.9728128910064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4499448835849762, "epoch": 3.98, "learning_rate": 2.1579519381717184e-05, "loss": 0.4738, "step": 4707, "task_loss": 0.42878738045692444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5996905565261841, "epoch": 3.98, "learning_rate": 2.1573481463591355e-05, "loss": 0.5986, "step": 4708, "task_loss": 0.5559734106063843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5977228879928589, "epoch": 3.98, "learning_rate": 2.1567443545465525e-05, "loss": 0.626, "step": 4709, "task_loss": 0.6031299829483032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3838256001472473, "epoch": 3.98, "learning_rate": 2.1561405627339696e-05, "loss": 0.6501, "step": 4710, "task_loss": 0.671137273311615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36928120255470276, "epoch": 3.98, "learning_rate": 2.1555367709213863e-05, "loss": 0.4995, "step": 4711, "task_loss": 0.41919150948524475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7278931140899658, "epoch": 3.98, "learning_rate": 2.1549329791088034e-05, "loss": 0.7055, "step": 4712, "task_loss": 0.9952042698860168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3223481774330139, "epoch": 3.98, "learning_rate": 2.1543291872962204e-05, "loss": 0.4211, "step": 4713, "task_loss": 0.6666816473007202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5668814182281494, "epoch": 3.98, "learning_rate": 2.153725395483637e-05, "loss": 0.8542, "step": 4714, "task_loss": 0.8340035676956177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5388627052307129, "epoch": 3.99, "learning_rate": 2.1531216036710545e-05, "loss": 0.5364, "step": 4715, "task_loss": 0.45354506373405457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2979797124862671, "epoch": 3.99, "learning_rate": 2.1525178118584713e-05, "loss": 0.6399, "step": 4716, "task_loss": 0.16478177905082703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9460223317146301, "epoch": 3.99, "learning_rate": 2.1519140200458883e-05, "loss": 0.8317, "step": 4717, "task_loss": 0.5992939472198486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4957447946071625, "epoch": 3.99, "learning_rate": 2.1513102282333054e-05, "loss": 0.5025, "step": 4718, "task_loss": 0.419078528881073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3905831575393677, "epoch": 3.99, "learning_rate": 2.150706436420722e-05, "loss": 0.6132, "step": 4719, "task_loss": 0.4484780728816986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.519612729549408, "epoch": 3.99, "learning_rate": 2.1501026446081395e-05, "loss": 0.5302, "step": 4720, "task_loss": 0.7195404767990112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40026116371154785, "epoch": 3.99, "learning_rate": 2.1494988527955562e-05, "loss": 0.4523, "step": 4721, "task_loss": 0.5414677858352661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0485765933990479, "epoch": 3.99, "learning_rate": 2.148895060982973e-05, "loss": 0.69, "step": 4722, "task_loss": 1.3995647430419922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42104777693748474, "epoch": 3.99, "learning_rate": 2.1482912691703903e-05, "loss": 0.5349, "step": 4723, "task_loss": 0.7281877994537354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40247026085853577, "epoch": 3.99, "learning_rate": 2.147687477357807e-05, "loss": 0.5937, "step": 4724, "task_loss": 0.48326680064201355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6824438571929932, "epoch": 3.99, "learning_rate": 2.147083685545224e-05, "loss": 0.6484, "step": 4725, "task_loss": 0.3684065043926239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6422073841094971, "epoch": 3.99, "learning_rate": 2.146479893732641e-05, "loss": 0.5023, "step": 4726, "task_loss": 0.33097705245018005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.804389476776123, "epoch": 4.0, "learning_rate": 2.145876101920058e-05, "loss": 0.6558, "step": 4727, "task_loss": 1.2167619466781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6194782257080078, "epoch": 4.0, "learning_rate": 2.145272310107475e-05, "loss": 0.5813, "step": 4728, "task_loss": 1.4933849573135376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1649617850780487, "epoch": 4.0, "learning_rate": 2.144668518294892e-05, "loss": 0.454, "step": 4729, "task_loss": 0.09604117274284363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7983605861663818, "epoch": 4.0, "learning_rate": 2.144064726482309e-05, "loss": 0.8197, "step": 4730, "task_loss": 1.3449786901474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7365612983703613, "epoch": 4.0, "learning_rate": 2.143460934669726e-05, "loss": 0.6017, "step": 4731, "task_loss": 0.7054152488708496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5873042941093445, "epoch": 4.0, "learning_rate": 2.1428571428571428e-05, "loss": 0.5771, "step": 4732, "task_loss": 0.5025756359100342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31387650966644287, "epoch": 4.0, "learning_rate": 2.14225335104456e-05, "loss": 0.6364, "step": 4733, "task_loss": 0.7157849073410034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5177661180496216, "epoch": 4.0, "learning_rate": 2.141649559231977e-05, "loss": 0.5709, "step": 4734, "task_loss": 1.0256260633468628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3835583031177521, "epoch": 4.0, "learning_rate": 2.141045767419394e-05, "loss": 0.5284, "step": 4735, "task_loss": 1.4411654472351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5911240577697754, "epoch": 4.0, "learning_rate": 2.1404419756068107e-05, "loss": 0.5764, "step": 4736, "task_loss": 1.5805437564849854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49940869212150574, "epoch": 4.0, "learning_rate": 2.1398381837942278e-05, "loss": 0.5287, "step": 4737, "task_loss": 0.33925512433052063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5766795873641968, "epoch": 4.01, "learning_rate": 2.1392343919816448e-05, "loss": 0.5163, "step": 4738, "task_loss": 0.5510725975036621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6070358157157898, "epoch": 4.01, "learning_rate": 2.138630600169062e-05, "loss": 0.6029, "step": 4739, "task_loss": 0.365557461977005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4499216079711914, "epoch": 4.01, "learning_rate": 2.138026808356479e-05, "loss": 0.4547, "step": 4740, "task_loss": 0.28514420986175537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3616742193698883, "epoch": 4.01, "learning_rate": 2.1374230165438957e-05, "loss": 0.5692, "step": 4741, "task_loss": 0.3485644459724426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5094785690307617, "epoch": 4.01, "learning_rate": 2.1368192247313127e-05, "loss": 0.5021, "step": 4742, "task_loss": 0.1370406448841095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40034934878349304, "epoch": 4.01, "learning_rate": 2.1362154329187298e-05, "loss": 0.525, "step": 4743, "task_loss": 0.055064428597688675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38077637553215027, "epoch": 4.01, "learning_rate": 2.1356116411061465e-05, "loss": 0.7244, "step": 4744, "task_loss": 0.43246057629585266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38661447167396545, "epoch": 4.01, "learning_rate": 2.135007849293564e-05, "loss": 0.4942, "step": 4745, "task_loss": 0.49314892292022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.415460467338562, "epoch": 4.01, "learning_rate": 2.1344040574809806e-05, "loss": 0.4116, "step": 4746, "task_loss": 0.2467094361782074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.771670401096344, "epoch": 4.01, "learning_rate": 2.1338002656683977e-05, "loss": 0.7333, "step": 4747, "task_loss": 0.1999107003211975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5362812280654907, "epoch": 4.01, "learning_rate": 2.1331964738558147e-05, "loss": 0.7488, "step": 4748, "task_loss": 0.747908353805542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1375824213027954, "epoch": 4.01, "learning_rate": 2.1325926820432314e-05, "loss": 0.6935, "step": 4749, "task_loss": 0.7045477032661438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5673202276229858, "epoch": 4.02, "learning_rate": 2.1319888902306488e-05, "loss": 0.5424, "step": 4750, "task_loss": 0.77134770154953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6390001177787781, "epoch": 4.02, "learning_rate": 2.1313850984180655e-05, "loss": 0.5799, "step": 4751, "task_loss": 0.3354516625404358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4571671485900879, "epoch": 4.02, "learning_rate": 2.1307813066054823e-05, "loss": 0.6341, "step": 4752, "task_loss": 0.7684768438339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4873442053794861, "epoch": 4.02, "learning_rate": 2.1301775147928997e-05, "loss": 0.6232, "step": 4753, "task_loss": 0.7417385578155518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5556789636611938, "epoch": 4.02, "learning_rate": 2.1295737229803164e-05, "loss": 0.8175, "step": 4754, "task_loss": 0.06997604668140411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6510725021362305, "epoch": 4.02, "learning_rate": 2.1289699311677334e-05, "loss": 0.6165, "step": 4755, "task_loss": 0.664560854434967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6853417158126831, "epoch": 4.02, "learning_rate": 2.1283661393551505e-05, "loss": 0.7159, "step": 4756, "task_loss": 0.7184975147247314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7224293351173401, "epoch": 4.02, "learning_rate": 2.1277623475425672e-05, "loss": 0.5319, "step": 4757, "task_loss": 1.2513842582702637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.692254900932312, "epoch": 4.02, "learning_rate": 2.1271585557299846e-05, "loss": 0.634, "step": 4758, "task_loss": 0.5582863688468933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47624650597572327, "epoch": 4.02, "learning_rate": 2.1265547639174013e-05, "loss": 0.4892, "step": 4759, "task_loss": 0.6942484378814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36005699634552, "epoch": 4.02, "learning_rate": 2.1259509721048184e-05, "loss": 0.4479, "step": 4760, "task_loss": 0.3934752643108368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0560712814331055, "epoch": 4.02, "learning_rate": 2.1253471802922354e-05, "loss": 0.5956, "step": 4761, "task_loss": 1.5140043497085571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4561522603034973, "epoch": 4.03, "learning_rate": 2.124743388479652e-05, "loss": 0.5798, "step": 4762, "task_loss": 0.7723429203033447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5672728419303894, "epoch": 4.03, "learning_rate": 2.1241395966670692e-05, "loss": 0.5252, "step": 4763, "task_loss": 0.5084905028343201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5851724147796631, "epoch": 4.03, "learning_rate": 2.1235358048544863e-05, "loss": 0.5389, "step": 4764, "task_loss": 0.5211634039878845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7486690282821655, "epoch": 4.03, "learning_rate": 2.1229320130419033e-05, "loss": 0.5966, "step": 4765, "task_loss": 0.6716009378433228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2804730534553528, "epoch": 4.03, "learning_rate": 2.1223282212293204e-05, "loss": 0.4164, "step": 4766, "task_loss": 0.819311261177063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420080423355103, "epoch": 4.03, "learning_rate": 2.121724429416737e-05, "loss": 0.5266, "step": 4767, "task_loss": 0.06789326667785645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3944246768951416, "epoch": 4.03, "learning_rate": 2.121120637604154e-05, "loss": 0.353, "step": 4768, "task_loss": 0.5486432909965515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35936596989631653, "epoch": 4.03, "learning_rate": 2.1205168457915712e-05, "loss": 0.3474, "step": 4769, "task_loss": 0.2861657440662384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6819813251495361, "epoch": 4.03, "learning_rate": 2.1199130539789883e-05, "loss": 0.725, "step": 4770, "task_loss": 0.4608876407146454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23729769885540009, "epoch": 4.03, "learning_rate": 2.119309262166405e-05, "loss": 0.4699, "step": 4771, "task_loss": 0.1860072761774063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2965691089630127, "epoch": 4.03, "learning_rate": 2.118705470353822e-05, "loss": 0.4626, "step": 4772, "task_loss": 0.4933810234069824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1424657106399536, "epoch": 4.03, "learning_rate": 2.118101678541239e-05, "loss": 0.6838, "step": 4773, "task_loss": 1.276176929473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4051796793937683, "epoch": 4.04, "learning_rate": 2.117497886728656e-05, "loss": 0.712, "step": 4774, "task_loss": 0.2759948670864105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5624643564224243, "epoch": 4.04, "learning_rate": 2.1168940949160732e-05, "loss": 0.5137, "step": 4775, "task_loss": 0.8818033933639526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37638020515441895, "epoch": 4.04, "learning_rate": 2.11629030310349e-05, "loss": 0.5196, "step": 4776, "task_loss": 0.5820809006690979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3399655818939209, "epoch": 4.04, "learning_rate": 2.115686511290907e-05, "loss": 0.5017, "step": 4777, "task_loss": 1.0989539623260498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4700782597064972, "epoch": 4.04, "learning_rate": 2.115082719478324e-05, "loss": 0.8396, "step": 4778, "task_loss": 0.819175660610199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39835840463638306, "epoch": 4.04, "learning_rate": 2.1144789276657408e-05, "loss": 0.4798, "step": 4779, "task_loss": 1.0504848957061768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2795675992965698, "epoch": 4.04, "learning_rate": 2.113875135853158e-05, "loss": 0.6328, "step": 4780, "task_loss": 0.7517013549804688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5812097787857056, "epoch": 4.04, "learning_rate": 2.113271344040575e-05, "loss": 0.552, "step": 4781, "task_loss": 0.739239513874054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5383638143539429, "epoch": 4.04, "learning_rate": 2.112667552227992e-05, "loss": 0.7043, "step": 4782, "task_loss": 0.40152570605278015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5513266921043396, "epoch": 4.04, "learning_rate": 2.112063760415409e-05, "loss": 0.4508, "step": 4783, "task_loss": 0.6069225668907166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2666395306587219, "epoch": 4.04, "learning_rate": 2.1114599686028257e-05, "loss": 0.4327, "step": 4784, "task_loss": 0.3989448845386505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4584823548793793, "epoch": 4.04, "learning_rate": 2.110856176790243e-05, "loss": 0.4622, "step": 4785, "task_loss": 0.3689836859703064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6570338606834412, "epoch": 4.05, "learning_rate": 2.11025238497766e-05, "loss": 0.5397, "step": 4786, "task_loss": 0.9169428944587708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4032033383846283, "epoch": 4.05, "learning_rate": 2.1096485931650766e-05, "loss": 0.6497, "step": 4787, "task_loss": 0.8290825486183167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6730924844741821, "epoch": 4.05, "learning_rate": 2.109044801352494e-05, "loss": 0.5493, "step": 4788, "task_loss": 0.7846968173980713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4957803785800934, "epoch": 4.05, "learning_rate": 2.1084410095399107e-05, "loss": 0.5058, "step": 4789, "task_loss": 0.3049405515193939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6287276148796082, "epoch": 4.05, "learning_rate": 2.1078372177273277e-05, "loss": 0.7166, "step": 4790, "task_loss": 0.5100951790809631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2914942800998688, "epoch": 4.05, "learning_rate": 2.1072334259147448e-05, "loss": 0.4147, "step": 4791, "task_loss": 0.25010931491851807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32795974612236023, "epoch": 4.05, "learning_rate": 2.1066296341021615e-05, "loss": 0.476, "step": 4792, "task_loss": 0.6992529630661011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3864227533340454, "epoch": 4.05, "learning_rate": 2.1060258422895786e-05, "loss": 0.4805, "step": 4793, "task_loss": 0.4145110547542572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3530798554420471, "epoch": 4.05, "learning_rate": 2.1054220504769956e-05, "loss": 0.3907, "step": 4794, "task_loss": 0.06814588606357574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28139442205429077, "epoch": 4.05, "learning_rate": 2.1048182586644127e-05, "loss": 0.49, "step": 4795, "task_loss": 0.0899096354842186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6951201558113098, "epoch": 4.05, "learning_rate": 2.1042144668518297e-05, "loss": 0.5916, "step": 4796, "task_loss": 1.1115447282791138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5278486013412476, "epoch": 4.05, "learning_rate": 2.1036106750392464e-05, "loss": 0.5752, "step": 4797, "task_loss": 1.0248665809631348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40249085426330566, "epoch": 4.06, "learning_rate": 2.1030068832266635e-05, "loss": 0.5342, "step": 4798, "task_loss": 0.4114725589752197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6014667749404907, "epoch": 4.06, "learning_rate": 2.1024030914140806e-05, "loss": 0.4923, "step": 4799, "task_loss": 0.8599461317062378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5211051106452942, "epoch": 4.06, "learning_rate": 2.1017992996014976e-05, "loss": 0.5223, "step": 4800, "task_loss": 0.17390498518943787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5792571306228638, "epoch": 4.06, "learning_rate": 2.1011955077889143e-05, "loss": 0.5304, "step": 4801, "task_loss": 0.9854143857955933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32039448618888855, "epoch": 4.06, "learning_rate": 2.1005917159763314e-05, "loss": 0.6038, "step": 4802, "task_loss": 0.13671576976776123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37806111574172974, "epoch": 4.06, "learning_rate": 2.0999879241637484e-05, "loss": 0.5315, "step": 4803, "task_loss": 0.45222413539886475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39244237542152405, "epoch": 4.06, "learning_rate": 2.0993841323511655e-05, "loss": 0.5848, "step": 4804, "task_loss": 0.9736164808273315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19975732266902924, "epoch": 4.06, "learning_rate": 2.0987803405385826e-05, "loss": 0.4394, "step": 4805, "task_loss": 0.40974661707878113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31402015686035156, "epoch": 4.06, "learning_rate": 2.0981765487259993e-05, "loss": 0.5291, "step": 4806, "task_loss": 0.5116965174674988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6494240164756775, "epoch": 4.06, "learning_rate": 2.0975727569134163e-05, "loss": 0.657, "step": 4807, "task_loss": 0.10465505719184875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5152121782302856, "epoch": 4.06, "learning_rate": 2.0969689651008334e-05, "loss": 0.6183, "step": 4808, "task_loss": 0.791029155254364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.778637707233429, "epoch": 4.07, "learning_rate": 2.09636517328825e-05, "loss": 0.5603, "step": 4809, "task_loss": 1.1276065111160278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4197838306427002, "epoch": 4.07, "learning_rate": 2.0957613814756675e-05, "loss": 0.5307, "step": 4810, "task_loss": 0.2402072250843048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2966751158237457, "epoch": 4.07, "learning_rate": 2.0951575896630842e-05, "loss": 0.4933, "step": 4811, "task_loss": 0.676371157169342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35055047273635864, "epoch": 4.07, "learning_rate": 2.0945537978505013e-05, "loss": 0.4212, "step": 4812, "task_loss": 1.3238452672958374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5209638476371765, "epoch": 4.07, "learning_rate": 2.0939500060379183e-05, "loss": 0.5894, "step": 4813, "task_loss": 0.8793561458587646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4854776859283447, "epoch": 4.07, "learning_rate": 2.093346214225335e-05, "loss": 0.5731, "step": 4814, "task_loss": 1.1083720922470093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44690054655075073, "epoch": 4.07, "learning_rate": 2.0927424224127525e-05, "loss": 0.5348, "step": 4815, "task_loss": 0.1143370270729065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3003236651420593, "epoch": 4.07, "learning_rate": 2.0921386306001692e-05, "loss": 0.3989, "step": 4816, "task_loss": 0.2688910961151123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5574758052825928, "epoch": 4.07, "learning_rate": 2.091534838787586e-05, "loss": 0.6373, "step": 4817, "task_loss": 0.9178678393363953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4432816207408905, "epoch": 4.07, "learning_rate": 2.0909310469750033e-05, "loss": 0.4265, "step": 4818, "task_loss": 0.5627980828285217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5458700656890869, "epoch": 4.07, "learning_rate": 2.09032725516242e-05, "loss": 0.4196, "step": 4819, "task_loss": 0.45111384987831116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4634338617324829, "epoch": 4.07, "learning_rate": 2.089723463349837e-05, "loss": 0.4065, "step": 4820, "task_loss": 0.3636632263660431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45153355598449707, "epoch": 4.08, "learning_rate": 2.089119671537254e-05, "loss": 0.4754, "step": 4821, "task_loss": 0.6986990571022034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3073423206806183, "epoch": 4.08, "learning_rate": 2.088515879724671e-05, "loss": 0.6625, "step": 4822, "task_loss": 0.522067129611969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48834651708602905, "epoch": 4.08, "learning_rate": 2.0879120879120882e-05, "loss": 0.5237, "step": 4823, "task_loss": 0.5856226086616516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3686964511871338, "epoch": 4.08, "learning_rate": 2.087308296099505e-05, "loss": 0.5105, "step": 4824, "task_loss": 0.3028053045272827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46363118290901184, "epoch": 4.08, "learning_rate": 2.086704504286922e-05, "loss": 0.5166, "step": 4825, "task_loss": 0.8139827251434326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6916056275367737, "epoch": 4.08, "learning_rate": 2.086100712474339e-05, "loss": 0.4746, "step": 4826, "task_loss": 0.5807672142982483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3776167631149292, "epoch": 4.08, "learning_rate": 2.0854969206617558e-05, "loss": 0.4624, "step": 4827, "task_loss": 0.39206063747406006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4769865870475769, "epoch": 4.08, "learning_rate": 2.084893128849173e-05, "loss": 0.6137, "step": 4828, "task_loss": 0.5209089517593384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36051177978515625, "epoch": 4.08, "learning_rate": 2.08428933703659e-05, "loss": 0.451, "step": 4829, "task_loss": 0.4639691710472107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5690087676048279, "epoch": 4.08, "learning_rate": 2.083685545224007e-05, "loss": 0.5595, "step": 4830, "task_loss": 0.8772652745246887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33982405066490173, "epoch": 4.08, "learning_rate": 2.083081753411424e-05, "loss": 0.577, "step": 4831, "task_loss": 0.7649607062339783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5056725144386292, "epoch": 4.08, "learning_rate": 2.0824779615988407e-05, "loss": 0.5053, "step": 4832, "task_loss": 0.9000107645988464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4725050926208496, "epoch": 4.09, "learning_rate": 2.0818741697862578e-05, "loss": 0.662, "step": 4833, "task_loss": 1.650697946548462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43998655676841736, "epoch": 4.09, "learning_rate": 2.081270377973675e-05, "loss": 0.5597, "step": 4834, "task_loss": 0.6790078282356262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34183740615844727, "epoch": 4.09, "learning_rate": 2.080666586161092e-05, "loss": 0.3536, "step": 4835, "task_loss": 0.6885197162628174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8613196611404419, "epoch": 4.09, "learning_rate": 2.0800627943485086e-05, "loss": 0.6661, "step": 4836, "task_loss": 1.4692519903182983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42925214767456055, "epoch": 4.09, "learning_rate": 2.0794590025359257e-05, "loss": 0.561, "step": 4837, "task_loss": 0.8453882932662964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5459936857223511, "epoch": 4.09, "learning_rate": 2.0788552107233427e-05, "loss": 0.7471, "step": 4838, "task_loss": 1.9009873867034912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7726955413818359, "epoch": 4.09, "learning_rate": 2.0782514189107598e-05, "loss": 0.6079, "step": 4839, "task_loss": 0.6810758709907532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6751399040222168, "epoch": 4.09, "learning_rate": 2.077647627098177e-05, "loss": 0.7232, "step": 4840, "task_loss": 0.5369904637336731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43171146512031555, "epoch": 4.09, "learning_rate": 2.0770438352855936e-05, "loss": 0.5431, "step": 4841, "task_loss": 0.851557195186615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6060367822647095, "epoch": 4.09, "learning_rate": 2.0764400434730106e-05, "loss": 0.6636, "step": 4842, "task_loss": 0.4953901469707489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31919586658477783, "epoch": 4.09, "learning_rate": 2.0758362516604277e-05, "loss": 0.4883, "step": 4843, "task_loss": 0.8588495850563049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6748343110084534, "epoch": 4.09, "learning_rate": 2.0752324598478444e-05, "loss": 0.6488, "step": 4844, "task_loss": 0.9404436945915222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5297982096672058, "epoch": 4.1, "learning_rate": 2.0746286680352618e-05, "loss": 0.476, "step": 4845, "task_loss": 0.9502042531967163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4688900411128998, "epoch": 4.1, "learning_rate": 2.0740248762226785e-05, "loss": 0.6395, "step": 4846, "task_loss": 0.7039148807525635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6190444231033325, "epoch": 4.1, "learning_rate": 2.0734210844100956e-05, "loss": 0.4493, "step": 4847, "task_loss": 0.43178680539131165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.537518322467804, "epoch": 4.1, "learning_rate": 2.0728172925975126e-05, "loss": 0.4935, "step": 4848, "task_loss": 0.8373633623123169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6106147170066833, "epoch": 4.1, "learning_rate": 2.0722135007849293e-05, "loss": 0.5937, "step": 4849, "task_loss": 0.6050535440444946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5739820599555969, "epoch": 4.1, "learning_rate": 2.0716097089723464e-05, "loss": 0.5613, "step": 4850, "task_loss": 0.784114420413971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2724568545818329, "epoch": 4.1, "learning_rate": 2.0710059171597635e-05, "loss": 0.3785, "step": 4851, "task_loss": 0.28200775384902954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3734396994113922, "epoch": 4.1, "learning_rate": 2.0704021253471802e-05, "loss": 0.5408, "step": 4852, "task_loss": 0.48892220854759216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3134028911590576, "epoch": 4.1, "learning_rate": 2.0697983335345976e-05, "loss": 0.4789, "step": 4853, "task_loss": 0.2794402837753296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6287461519241333, "epoch": 4.1, "learning_rate": 2.0691945417220143e-05, "loss": 0.5876, "step": 4854, "task_loss": 0.2488531470298767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44084808230400085, "epoch": 4.1, "learning_rate": 2.0685907499094314e-05, "loss": 0.4973, "step": 4855, "task_loss": 0.30437490344047546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4538155198097229, "epoch": 4.1, "learning_rate": 2.0679869580968484e-05, "loss": 0.4641, "step": 4856, "task_loss": 0.35117271542549133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5552226901054382, "epoch": 4.11, "learning_rate": 2.067383166284265e-05, "loss": 0.5211, "step": 4857, "task_loss": 0.6853883266448975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9495399594306946, "epoch": 4.11, "learning_rate": 2.0667793744716822e-05, "loss": 0.6362, "step": 4858, "task_loss": 1.8198763132095337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4911735951900482, "epoch": 4.11, "learning_rate": 2.0661755826590992e-05, "loss": 0.5015, "step": 4859, "task_loss": 0.7053030729293823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7839615941047668, "epoch": 4.11, "learning_rate": 2.065571790846516e-05, "loss": 0.7798, "step": 4860, "task_loss": 0.3576664626598358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7067352533340454, "epoch": 4.11, "learning_rate": 2.0649679990339334e-05, "loss": 0.61, "step": 4861, "task_loss": 1.1211578845977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47274214029312134, "epoch": 4.11, "learning_rate": 2.06436420722135e-05, "loss": 0.5948, "step": 4862, "task_loss": 1.039860486984253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5427668690681458, "epoch": 4.11, "learning_rate": 2.063760415408767e-05, "loss": 0.5503, "step": 4863, "task_loss": 1.6784418821334839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36723172664642334, "epoch": 4.11, "learning_rate": 2.0631566235961842e-05, "loss": 0.4019, "step": 4864, "task_loss": 0.6649518013000488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2425124645233154, "epoch": 4.11, "learning_rate": 2.062552831783601e-05, "loss": 0.7267, "step": 4865, "task_loss": 0.867608368396759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36438655853271484, "epoch": 4.11, "learning_rate": 2.061949039971018e-05, "loss": 0.5222, "step": 4866, "task_loss": 0.6334699392318726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.812321126461029, "epoch": 4.11, "learning_rate": 2.061345248158435e-05, "loss": 0.5441, "step": 4867, "task_loss": 1.1545593738555908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.555686891078949, "epoch": 4.11, "learning_rate": 2.060741456345852e-05, "loss": 0.6475, "step": 4868, "task_loss": 1.6450722217559814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24309560656547546, "epoch": 4.12, "learning_rate": 2.060137664533269e-05, "loss": 0.4101, "step": 4869, "task_loss": 0.08592002093791962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7391124367713928, "epoch": 4.12, "learning_rate": 2.059533872720686e-05, "loss": 0.7967, "step": 4870, "task_loss": 0.9574471116065979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6637915372848511, "epoch": 4.12, "learning_rate": 2.058930080908103e-05, "loss": 0.6258, "step": 4871, "task_loss": 0.572775661945343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.414417028427124, "epoch": 4.12, "learning_rate": 2.05832628909552e-05, "loss": 0.5254, "step": 4872, "task_loss": 0.8022780418395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420283079147339, "epoch": 4.12, "learning_rate": 2.057722497282937e-05, "loss": 0.4529, "step": 4873, "task_loss": 0.3423026204109192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5352892279624939, "epoch": 4.12, "learning_rate": 2.0571187054703537e-05, "loss": 0.4552, "step": 4874, "task_loss": 0.8503915667533875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40901365876197815, "epoch": 4.12, "learning_rate": 2.0565149136577708e-05, "loss": 0.4971, "step": 4875, "task_loss": 0.33442750573158264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.81807941198349, "epoch": 4.12, "learning_rate": 2.055911121845188e-05, "loss": 0.5126, "step": 4876, "task_loss": 0.9090336561203003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5702627897262573, "epoch": 4.12, "learning_rate": 2.055307330032605e-05, "loss": 0.4937, "step": 4877, "task_loss": 0.8171088695526123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5972639322280884, "epoch": 4.12, "learning_rate": 2.054703538220022e-05, "loss": 0.5401, "step": 4878, "task_loss": 0.491543173789978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21900400519371033, "epoch": 4.12, "learning_rate": 2.0540997464074387e-05, "loss": 0.5876, "step": 4879, "task_loss": 0.3664439022541046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5002795457839966, "epoch": 4.13, "learning_rate": 2.0534959545948557e-05, "loss": 0.5045, "step": 4880, "task_loss": 0.2700909972190857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3265456557273865, "epoch": 4.13, "learning_rate": 2.0528921627822728e-05, "loss": 0.437, "step": 4881, "task_loss": 0.13341550529003143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5095371603965759, "epoch": 4.13, "learning_rate": 2.0522883709696895e-05, "loss": 0.613, "step": 4882, "task_loss": 1.263572335243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4166337847709656, "epoch": 4.13, "learning_rate": 2.051684579157107e-05, "loss": 0.4706, "step": 4883, "task_loss": 1.2293126583099365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7518548965454102, "epoch": 4.13, "learning_rate": 2.0510807873445236e-05, "loss": 0.6321, "step": 4884, "task_loss": 0.7778270244598389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5106979608535767, "epoch": 4.13, "learning_rate": 2.0504769955319407e-05, "loss": 0.5266, "step": 4885, "task_loss": 0.1402025818824768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48517274856567383, "epoch": 4.13, "learning_rate": 2.0498732037193578e-05, "loss": 0.5374, "step": 4886, "task_loss": 0.31376367807388306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39052340388298035, "epoch": 4.13, "learning_rate": 2.0492694119067745e-05, "loss": 0.4256, "step": 4887, "task_loss": 0.21021248400211334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5862571597099304, "epoch": 4.13, "learning_rate": 2.048665620094192e-05, "loss": 0.6384, "step": 4888, "task_loss": 0.933180034160614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3617023825645447, "epoch": 4.13, "learning_rate": 2.0480618282816086e-05, "loss": 0.4606, "step": 4889, "task_loss": 0.08401191234588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5172408223152161, "epoch": 4.13, "learning_rate": 2.0474580364690253e-05, "loss": 0.6933, "step": 4890, "task_loss": 1.0569700002670288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38278430700302124, "epoch": 4.13, "learning_rate": 2.0468542446564427e-05, "loss": 0.5293, "step": 4891, "task_loss": 0.6304617524147034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5206208825111389, "epoch": 4.14, "learning_rate": 2.0462504528438594e-05, "loss": 0.6246, "step": 4892, "task_loss": 0.6471975445747375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7046142816543579, "epoch": 4.14, "learning_rate": 2.0456466610312765e-05, "loss": 0.6234, "step": 4893, "task_loss": 1.216585397720337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6257479190826416, "epoch": 4.14, "learning_rate": 2.0450428692186935e-05, "loss": 0.5103, "step": 4894, "task_loss": 0.7435041666030884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4277074337005615, "epoch": 4.14, "learning_rate": 2.0444390774061102e-05, "loss": 0.4981, "step": 4895, "task_loss": 0.955923855304718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7934571504592896, "epoch": 4.14, "learning_rate": 2.0438352855935276e-05, "loss": 0.4517, "step": 4896, "task_loss": 0.8734060525894165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4151521325111389, "epoch": 4.14, "learning_rate": 2.0432314937809444e-05, "loss": 0.7062, "step": 4897, "task_loss": 1.0998998880386353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5007086992263794, "epoch": 4.14, "learning_rate": 2.0426277019683614e-05, "loss": 0.7064, "step": 4898, "task_loss": 0.955003559589386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3017944097518921, "epoch": 4.14, "learning_rate": 2.0420239101557785e-05, "loss": 0.4384, "step": 4899, "task_loss": 0.18417111039161682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41983672976493835, "epoch": 4.14, "learning_rate": 2.0414201183431952e-05, "loss": 0.5696, "step": 4900, "task_loss": 0.5690034031867981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7621749043464661, "epoch": 4.14, "learning_rate": 2.0408163265306123e-05, "loss": 0.5547, "step": 4901, "task_loss": 1.0805693864822388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27504539489746094, "epoch": 4.14, "learning_rate": 2.0402125347180293e-05, "loss": 0.4971, "step": 4902, "task_loss": 0.8569192290306091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31197261810302734, "epoch": 4.14, "learning_rate": 2.0396087429054464e-05, "loss": 0.4704, "step": 4903, "task_loss": 0.4006117880344391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2562114894390106, "epoch": 4.15, "learning_rate": 2.0390049510928634e-05, "loss": 0.3859, "step": 4904, "task_loss": 0.11043187230825424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7524648308753967, "epoch": 4.15, "learning_rate": 2.03840115928028e-05, "loss": 0.5424, "step": 4905, "task_loss": 0.6407849192619324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4935816526412964, "epoch": 4.15, "learning_rate": 2.0377973674676972e-05, "loss": 0.6947, "step": 4906, "task_loss": 0.7342183589935303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4817306399345398, "epoch": 4.15, "learning_rate": 2.0371935756551143e-05, "loss": 0.4713, "step": 4907, "task_loss": 0.3491319715976715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5387006402015686, "epoch": 4.15, "learning_rate": 2.0365897838425313e-05, "loss": 0.6569, "step": 4908, "task_loss": 0.4180913269519806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5934556126594543, "epoch": 4.15, "learning_rate": 2.035985992029948e-05, "loss": 0.6321, "step": 4909, "task_loss": 0.6348301768302917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40936630964279175, "epoch": 4.15, "learning_rate": 2.035382200217365e-05, "loss": 0.5107, "step": 4910, "task_loss": 0.5561091899871826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6141155958175659, "epoch": 4.15, "learning_rate": 2.034778408404782e-05, "loss": 0.5211, "step": 4911, "task_loss": 1.0218429565429688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4583278298377991, "epoch": 4.15, "learning_rate": 2.0341746165921992e-05, "loss": 0.5145, "step": 4912, "task_loss": 0.7295549511909485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6116304397583008, "epoch": 4.15, "learning_rate": 2.0335708247796163e-05, "loss": 0.5252, "step": 4913, "task_loss": 0.6079584956169128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7159029245376587, "epoch": 4.15, "learning_rate": 2.032967032967033e-05, "loss": 0.5575, "step": 4914, "task_loss": 1.1247341632843018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31829237937927246, "epoch": 4.15, "learning_rate": 2.03236324115445e-05, "loss": 0.419, "step": 4915, "task_loss": 0.061529599130153656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4920375943183899, "epoch": 4.16, "learning_rate": 2.031759449341867e-05, "loss": 0.6514, "step": 4916, "task_loss": 0.36307623982429504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4403213858604431, "epoch": 4.16, "learning_rate": 2.0311556575292838e-05, "loss": 0.5035, "step": 4917, "task_loss": 0.809432327747345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5061545372009277, "epoch": 4.16, "learning_rate": 2.0305518657167012e-05, "loss": 0.5346, "step": 4918, "task_loss": 1.1466282606124878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37074413895606995, "epoch": 4.16, "learning_rate": 2.029948073904118e-05, "loss": 0.522, "step": 4919, "task_loss": 0.4455549418926239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2901389002799988, "epoch": 4.16, "learning_rate": 2.029344282091535e-05, "loss": 0.4272, "step": 4920, "task_loss": 0.6190131306648254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5332611799240112, "epoch": 4.16, "learning_rate": 2.028740490278952e-05, "loss": 0.4635, "step": 4921, "task_loss": 0.5690762996673584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48544007539749146, "epoch": 4.16, "learning_rate": 2.0281366984663688e-05, "loss": 0.5802, "step": 4922, "task_loss": 0.582472026348114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5117124319076538, "epoch": 4.16, "learning_rate": 2.0275329066537858e-05, "loss": 0.5468, "step": 4923, "task_loss": 0.5030387043952942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5606534481048584, "epoch": 4.16, "learning_rate": 2.026929114841203e-05, "loss": 0.4677, "step": 4924, "task_loss": 0.7098164558410645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25867798924446106, "epoch": 4.16, "learning_rate": 2.0263253230286196e-05, "loss": 0.3534, "step": 4925, "task_loss": 0.11649671941995621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3853670358657837, "epoch": 4.16, "learning_rate": 2.025721531216037e-05, "loss": 0.3969, "step": 4926, "task_loss": 0.48405909538269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34250783920288086, "epoch": 4.16, "learning_rate": 2.0251177394034537e-05, "loss": 0.5536, "step": 4927, "task_loss": 0.49599212408065796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4231536388397217, "epoch": 4.17, "learning_rate": 2.0245139475908708e-05, "loss": 0.4616, "step": 4928, "task_loss": 0.1916409134864807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49538522958755493, "epoch": 4.17, "learning_rate": 2.0239101557782878e-05, "loss": 0.5214, "step": 4929, "task_loss": 0.7686262726783752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5811537504196167, "epoch": 4.17, "learning_rate": 2.0233063639657045e-05, "loss": 0.5252, "step": 4930, "task_loss": 0.23819835484027863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42883989214897156, "epoch": 4.17, "learning_rate": 2.0227025721531216e-05, "loss": 0.595, "step": 4931, "task_loss": 1.3238552808761597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6786895990371704, "epoch": 4.17, "learning_rate": 2.0220987803405387e-05, "loss": 0.667, "step": 4932, "task_loss": 1.0247772932052612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2992522418498993, "epoch": 4.17, "learning_rate": 2.0214949885279557e-05, "loss": 0.3871, "step": 4933, "task_loss": 0.5979833006858826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25604087114334106, "epoch": 4.17, "learning_rate": 2.0208911967153728e-05, "loss": 0.6609, "step": 4934, "task_loss": 0.09765611588954926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6350706219673157, "epoch": 4.17, "learning_rate": 2.0202874049027895e-05, "loss": 0.5367, "step": 4935, "task_loss": 0.7732728719711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17263920605182648, "epoch": 4.17, "learning_rate": 2.0196836130902065e-05, "loss": 0.4161, "step": 4936, "task_loss": 0.03403476998209953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38287174701690674, "epoch": 4.17, "learning_rate": 2.0190798212776236e-05, "loss": 0.4248, "step": 4937, "task_loss": 0.8116239309310913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48531174659729004, "epoch": 4.17, "learning_rate": 2.0184760294650407e-05, "loss": 0.5875, "step": 4938, "task_loss": 0.8728266954421997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2720785140991211, "epoch": 4.17, "learning_rate": 2.0178722376524574e-05, "loss": 0.6214, "step": 4939, "task_loss": 0.6443714499473572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5548807978630066, "epoch": 4.18, "learning_rate": 2.0172684458398744e-05, "loss": 0.5624, "step": 4940, "task_loss": 1.4689717292785645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4050292670726776, "epoch": 4.18, "learning_rate": 2.0166646540272915e-05, "loss": 0.3926, "step": 4941, "task_loss": 0.4794938862323761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33319228887557983, "epoch": 4.18, "learning_rate": 2.0160608622147085e-05, "loss": 0.4167, "step": 4942, "task_loss": 0.8639678359031677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5307009816169739, "epoch": 4.18, "learning_rate": 2.0154570704021256e-05, "loss": 0.6053, "step": 4943, "task_loss": 0.6365676522254944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34072375297546387, "epoch": 4.18, "learning_rate": 2.0148532785895423e-05, "loss": 0.4851, "step": 4944, "task_loss": 0.43860340118408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6105844974517822, "epoch": 4.18, "learning_rate": 2.0142494867769594e-05, "loss": 0.4286, "step": 4945, "task_loss": 0.8512235879898071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3144579529762268, "epoch": 4.18, "learning_rate": 2.0136456949643764e-05, "loss": 0.5254, "step": 4946, "task_loss": 0.11859353631734848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4497188925743103, "epoch": 4.18, "learning_rate": 2.013041903151793e-05, "loss": 0.4552, "step": 4947, "task_loss": 0.5314205884933472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42237424850463867, "epoch": 4.18, "learning_rate": 2.0124381113392105e-05, "loss": 0.4107, "step": 4948, "task_loss": 0.5615251064300537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4527356028556824, "epoch": 4.18, "learning_rate": 2.0118343195266273e-05, "loss": 0.4336, "step": 4949, "task_loss": 1.429122805595398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.677120566368103, "epoch": 4.18, "learning_rate": 2.0112305277140443e-05, "loss": 0.5756, "step": 4950, "task_loss": 0.7396681308746338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2748467028141022, "epoch": 4.19, "learning_rate": 2.0106267359014614e-05, "loss": 0.4943, "step": 4951, "task_loss": 0.05191145837306976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38200289011001587, "epoch": 4.19, "learning_rate": 2.010022944088878e-05, "loss": 0.4552, "step": 4952, "task_loss": 0.6278354525566101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4642336666584015, "epoch": 4.19, "learning_rate": 2.0094191522762955e-05, "loss": 0.4749, "step": 4953, "task_loss": 1.0173221826553345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46911531686782837, "epoch": 4.19, "learning_rate": 2.0088153604637122e-05, "loss": 0.5151, "step": 4954, "task_loss": 0.9429162740707397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6079760789871216, "epoch": 4.19, "learning_rate": 2.008211568651129e-05, "loss": 0.6305, "step": 4955, "task_loss": 0.2932954430580139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0613927841186523, "epoch": 4.19, "learning_rate": 2.0076077768385463e-05, "loss": 0.6236, "step": 4956, "task_loss": 0.8749066591262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3994763195514679, "epoch": 4.19, "learning_rate": 2.007003985025963e-05, "loss": 0.4237, "step": 4957, "task_loss": 0.1639009565114975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4584995210170746, "epoch": 4.19, "learning_rate": 2.00640019321338e-05, "loss": 0.5563, "step": 4958, "task_loss": 0.36930835247039795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38101595640182495, "epoch": 4.19, "learning_rate": 2.005796401400797e-05, "loss": 0.4647, "step": 4959, "task_loss": 0.29052144289016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.581211507320404, "epoch": 4.19, "learning_rate": 2.005192609588214e-05, "loss": 0.3978, "step": 4960, "task_loss": 0.3513124883174896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25656163692474365, "epoch": 4.19, "learning_rate": 2.0045888177756313e-05, "loss": 0.4314, "step": 4961, "task_loss": 1.7514078617095947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7383733987808228, "epoch": 4.19, "learning_rate": 2.003985025963048e-05, "loss": 0.5809, "step": 4962, "task_loss": 0.9347806572914124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42711612582206726, "epoch": 4.2, "learning_rate": 2.003381234150465e-05, "loss": 0.6147, "step": 4963, "task_loss": 0.43052881956100464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34706443548202515, "epoch": 4.2, "learning_rate": 2.002777442337882e-05, "loss": 0.5872, "step": 4964, "task_loss": 0.6177386045455933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39574432373046875, "epoch": 4.2, "learning_rate": 2.0021736505252988e-05, "loss": 0.63, "step": 4965, "task_loss": 0.3420225977897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43850213289260864, "epoch": 4.2, "learning_rate": 2.001569858712716e-05, "loss": 0.5818, "step": 4966, "task_loss": 0.9004715085029602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5186364650726318, "epoch": 4.2, "learning_rate": 2.000966066900133e-05, "loss": 0.6236, "step": 4967, "task_loss": 0.9594752788543701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24794182181358337, "epoch": 4.2, "learning_rate": 2.00036227508755e-05, "loss": 0.4956, "step": 4968, "task_loss": 0.5048964023590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2958824634552002, "epoch": 4.2, "learning_rate": 1.999758483274967e-05, "loss": 0.6031, "step": 4969, "task_loss": 0.43613654375076294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9088079333305359, "epoch": 4.2, "learning_rate": 1.9991546914623838e-05, "loss": 0.7615, "step": 4970, "task_loss": 1.6905773878097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4514431357383728, "epoch": 4.2, "learning_rate": 1.9985508996498008e-05, "loss": 0.3774, "step": 4971, "task_loss": 0.4469534754753113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4144880175590515, "epoch": 4.2, "learning_rate": 1.997947107837218e-05, "loss": 0.6766, "step": 4972, "task_loss": 0.7785089015960693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7056358456611633, "epoch": 4.2, "learning_rate": 1.997343316024635e-05, "loss": 0.6371, "step": 4973, "task_loss": 0.7132214903831482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5285404920578003, "epoch": 4.2, "learning_rate": 1.9967395242120517e-05, "loss": 0.4511, "step": 4974, "task_loss": 1.0623445510864258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3632991313934326, "epoch": 4.21, "learning_rate": 1.9961357323994687e-05, "loss": 0.5419, "step": 4975, "task_loss": 0.5024172067642212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4428093135356903, "epoch": 4.21, "learning_rate": 1.9955319405868858e-05, "loss": 0.5169, "step": 4976, "task_loss": 0.7164888381958008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6598308682441711, "epoch": 4.21, "learning_rate": 1.994928148774303e-05, "loss": 0.5541, "step": 4977, "task_loss": 1.258410930633545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7521320581436157, "epoch": 4.21, "learning_rate": 1.99432435696172e-05, "loss": 0.5877, "step": 4978, "task_loss": 0.3530876934528351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6055967807769775, "epoch": 4.21, "learning_rate": 1.9937205651491366e-05, "loss": 0.469, "step": 4979, "task_loss": 0.5825212597846985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37297290563583374, "epoch": 4.21, "learning_rate": 1.9931167733365537e-05, "loss": 0.5484, "step": 4980, "task_loss": 0.5703855752944946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4025610685348511, "epoch": 4.21, "learning_rate": 1.9925129815239707e-05, "loss": 0.7857, "step": 4981, "task_loss": 0.4340205490589142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.371643602848053, "epoch": 4.21, "learning_rate": 1.9919091897113874e-05, "loss": 0.3604, "step": 4982, "task_loss": 0.9145194888114929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5458828210830688, "epoch": 4.21, "learning_rate": 1.991305397898805e-05, "loss": 0.5892, "step": 4983, "task_loss": 0.7225217819213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8708949089050293, "epoch": 4.21, "learning_rate": 1.9907016060862216e-05, "loss": 0.6921, "step": 4984, "task_loss": 1.502190351486206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47971537709236145, "epoch": 4.21, "learning_rate": 1.9900978142736386e-05, "loss": 0.423, "step": 4985, "task_loss": 0.708484411239624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44678863883018494, "epoch": 4.21, "learning_rate": 1.9894940224610557e-05, "loss": 0.6514, "step": 4986, "task_loss": 0.6540032625198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3461977243423462, "epoch": 4.22, "learning_rate": 1.9888902306484724e-05, "loss": 0.4556, "step": 4987, "task_loss": 1.1603264808654785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7210103273391724, "epoch": 4.22, "learning_rate": 1.9882864388358894e-05, "loss": 0.5274, "step": 4988, "task_loss": 1.5134752988815308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33187419176101685, "epoch": 4.22, "learning_rate": 1.9876826470233065e-05, "loss": 0.5035, "step": 4989, "task_loss": 0.7476351857185364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.452039897441864, "epoch": 4.22, "learning_rate": 1.9870788552107232e-05, "loss": 0.5199, "step": 4990, "task_loss": 1.0907161235809326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24417778849601746, "epoch": 4.22, "learning_rate": 1.9864750633981406e-05, "loss": 0.4159, "step": 4991, "task_loss": 0.2452525645494461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42879533767700195, "epoch": 4.22, "learning_rate": 1.9858712715855573e-05, "loss": 0.5038, "step": 4992, "task_loss": 0.6371445059776306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3104240298271179, "epoch": 4.22, "learning_rate": 1.9852674797729744e-05, "loss": 0.4537, "step": 4993, "task_loss": 0.34255972504615784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5890994071960449, "epoch": 4.22, "learning_rate": 1.9846636879603914e-05, "loss": 0.4542, "step": 4994, "task_loss": 1.377626895904541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5061273574829102, "epoch": 4.22, "learning_rate": 1.984059896147808e-05, "loss": 0.4727, "step": 4995, "task_loss": 0.7576563954353333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33526724576950073, "epoch": 4.22, "learning_rate": 1.9834561043352252e-05, "loss": 0.5027, "step": 4996, "task_loss": 0.5246338844299316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5494797825813293, "epoch": 4.22, "learning_rate": 1.9828523125226423e-05, "loss": 0.6421, "step": 4997, "task_loss": 0.9772250652313232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5553446412086487, "epoch": 4.22, "learning_rate": 1.9822485207100593e-05, "loss": 0.5829, "step": 4998, "task_loss": 0.22669434547424316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4884990453720093, "epoch": 4.23, "learning_rate": 1.9816447288974764e-05, "loss": 0.4299, "step": 4999, "task_loss": 0.7669544219970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47162771224975586, "epoch": 4.23, "learning_rate": 1.981040937084893e-05, "loss": 0.5348, "step": 5000, "task_loss": 0.9524763226509094 }, { "epoch": 4.23, "eval_accuracy": 0.9006336633663367, "eval_loss": 0.3424574136734009, "eval_runtime": 227.1205, "eval_samples_per_second": 111.174, "eval_steps_per_second": 0.872, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.532374382019043, "epoch": 4.23, "learning_rate": 1.9804371452723102e-05, "loss": 0.5458, "step": 5001, "task_loss": 0.7421075105667114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3753403425216675, "epoch": 4.23, "learning_rate": 1.9798333534597272e-05, "loss": 0.6103, "step": 5002, "task_loss": 0.24701862037181854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2989429831504822, "epoch": 4.23, "learning_rate": 1.9792295616471443e-05, "loss": 0.4956, "step": 5003, "task_loss": 0.4482909142971039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.622974693775177, "epoch": 4.23, "learning_rate": 1.978625769834561e-05, "loss": 0.5782, "step": 5004, "task_loss": 0.6856787800788879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.312394380569458, "epoch": 4.23, "learning_rate": 1.978021978021978e-05, "loss": 0.3891, "step": 5005, "task_loss": 0.27638155221939087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5630203485488892, "epoch": 4.23, "learning_rate": 1.977418186209395e-05, "loss": 0.5441, "step": 5006, "task_loss": 0.21401074528694153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4234447777271271, "epoch": 4.23, "learning_rate": 1.9768143943968122e-05, "loss": 0.3912, "step": 5007, "task_loss": 0.7603311538696289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3590165972709656, "epoch": 4.23, "learning_rate": 1.9762106025842292e-05, "loss": 0.4712, "step": 5008, "task_loss": 0.07598962634801865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38335761427879333, "epoch": 4.23, "learning_rate": 1.975606810771646e-05, "loss": 0.623, "step": 5009, "task_loss": 0.23930713534355164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5075116753578186, "epoch": 4.23, "learning_rate": 1.975003018959063e-05, "loss": 0.5581, "step": 5010, "task_loss": 0.7713028192520142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4468742311000824, "epoch": 4.24, "learning_rate": 1.97439922714648e-05, "loss": 0.426, "step": 5011, "task_loss": 0.6361373662948608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42873284220695496, "epoch": 4.24, "learning_rate": 1.9737954353338968e-05, "loss": 0.4742, "step": 5012, "task_loss": 0.8471259474754333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3435823917388916, "epoch": 4.24, "learning_rate": 1.9731916435213142e-05, "loss": 0.4364, "step": 5013, "task_loss": 0.5504423379898071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47085511684417725, "epoch": 4.24, "learning_rate": 1.972587851708731e-05, "loss": 0.5782, "step": 5014, "task_loss": 0.5790916681289673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37175899744033813, "epoch": 4.24, "learning_rate": 1.971984059896148e-05, "loss": 0.4701, "step": 5015, "task_loss": 0.3965161442756653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3714584410190582, "epoch": 4.24, "learning_rate": 1.971380268083565e-05, "loss": 0.5646, "step": 5016, "task_loss": 0.4959770143032074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3377198576927185, "epoch": 4.24, "learning_rate": 1.9707764762709817e-05, "loss": 0.5466, "step": 5017, "task_loss": 1.0765025615692139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3954613208770752, "epoch": 4.24, "learning_rate": 1.970172684458399e-05, "loss": 0.5697, "step": 5018, "task_loss": 0.3766115605831146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6157458424568176, "epoch": 4.24, "learning_rate": 1.969568892645816e-05, "loss": 0.5446, "step": 5019, "task_loss": 1.2350085973739624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5992120504379272, "epoch": 4.24, "learning_rate": 1.9689651008332326e-05, "loss": 0.412, "step": 5020, "task_loss": 0.3953227698802948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32933223247528076, "epoch": 4.24, "learning_rate": 1.96836130902065e-05, "loss": 0.4416, "step": 5021, "task_loss": 0.45828381180763245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.405678391456604, "epoch": 4.24, "learning_rate": 1.9677575172080667e-05, "loss": 0.5131, "step": 5022, "task_loss": 1.3066145181655884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45870906114578247, "epoch": 4.25, "learning_rate": 1.9671537253954837e-05, "loss": 0.343, "step": 5023, "task_loss": 0.49534887075424194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36197078227996826, "epoch": 4.25, "learning_rate": 1.9665499335829008e-05, "loss": 0.5431, "step": 5024, "task_loss": 0.37182527780532837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5389375686645508, "epoch": 4.25, "learning_rate": 1.9659461417703175e-05, "loss": 0.5633, "step": 5025, "task_loss": 0.6920986175537109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6336544752120972, "epoch": 4.25, "learning_rate": 1.965342349957735e-05, "loss": 0.6008, "step": 5026, "task_loss": 0.3556116819381714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5226563215255737, "epoch": 4.25, "learning_rate": 1.9647385581451516e-05, "loss": 0.577, "step": 5027, "task_loss": 0.35030683875083923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5266939401626587, "epoch": 4.25, "learning_rate": 1.9641347663325683e-05, "loss": 0.466, "step": 5028, "task_loss": 0.7201595306396484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.635701060295105, "epoch": 4.25, "learning_rate": 1.9635309745199857e-05, "loss": 0.5099, "step": 5029, "task_loss": 1.4238333702087402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42528796195983887, "epoch": 4.25, "learning_rate": 1.9629271827074025e-05, "loss": 0.4293, "step": 5030, "task_loss": 0.8699221611022949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5875149965286255, "epoch": 4.25, "learning_rate": 1.9623233908948195e-05, "loss": 0.6672, "step": 5031, "task_loss": 0.960726261138916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3001979887485504, "epoch": 4.25, "learning_rate": 1.9617195990822366e-05, "loss": 0.4146, "step": 5032, "task_loss": 0.08719417452812195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4166693389415741, "epoch": 4.25, "learning_rate": 1.9611158072696533e-05, "loss": 0.4172, "step": 5033, "task_loss": 0.7476599216461182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6274157762527466, "epoch": 4.26, "learning_rate": 1.9605120154570707e-05, "loss": 0.501, "step": 5034, "task_loss": 0.7170937657356262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22525671124458313, "epoch": 4.26, "learning_rate": 1.9599082236444874e-05, "loss": 0.5107, "step": 5035, "task_loss": 0.3120877742767334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4647220969200134, "epoch": 4.26, "learning_rate": 1.9593044318319045e-05, "loss": 0.4565, "step": 5036, "task_loss": 0.9033588171005249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.2947896718978882, "epoch": 4.26, "learning_rate": 1.9587006400193215e-05, "loss": 0.8977, "step": 5037, "task_loss": 0.5642430782318115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6388611793518066, "epoch": 4.26, "learning_rate": 1.9580968482067382e-05, "loss": 0.602, "step": 5038, "task_loss": 0.47370365262031555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5020397305488586, "epoch": 4.26, "learning_rate": 1.9574930563941553e-05, "loss": 0.6733, "step": 5039, "task_loss": 0.6022677421569824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4384838044643402, "epoch": 4.26, "learning_rate": 1.9568892645815723e-05, "loss": 0.4635, "step": 5040, "task_loss": 0.3759649693965912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5018907189369202, "epoch": 4.26, "learning_rate": 1.9562854727689894e-05, "loss": 0.3896, "step": 5041, "task_loss": 0.36943864822387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38962697982788086, "epoch": 4.26, "learning_rate": 1.9556816809564065e-05, "loss": 0.4285, "step": 5042, "task_loss": 0.556939959526062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2982959449291229, "epoch": 4.26, "learning_rate": 1.9550778891438232e-05, "loss": 0.4348, "step": 5043, "task_loss": 0.3856349587440491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8032476305961609, "epoch": 4.26, "learning_rate": 1.9544740973312402e-05, "loss": 0.5482, "step": 5044, "task_loss": 0.25123676657676697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4885512888431549, "epoch": 4.26, "learning_rate": 1.9538703055186573e-05, "loss": 0.4219, "step": 5045, "task_loss": 0.24022117257118225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4221011698246002, "epoch": 4.27, "learning_rate": 1.9532665137060744e-05, "loss": 0.3614, "step": 5046, "task_loss": 0.7391841411590576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4160241186618805, "epoch": 4.27, "learning_rate": 1.952662721893491e-05, "loss": 0.5984, "step": 5047, "task_loss": 0.40984421968460083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5365934371948242, "epoch": 4.27, "learning_rate": 1.952058930080908e-05, "loss": 0.5665, "step": 5048, "task_loss": 0.5471963882446289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7437394261360168, "epoch": 4.27, "learning_rate": 1.9514551382683252e-05, "loss": 0.5564, "step": 5049, "task_loss": 0.6211931109428406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42773520946502686, "epoch": 4.27, "learning_rate": 1.9508513464557422e-05, "loss": 0.3929, "step": 5050, "task_loss": 0.8035658597946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39322546124458313, "epoch": 4.27, "learning_rate": 1.9502475546431593e-05, "loss": 0.6138, "step": 5051, "task_loss": 0.703641414642334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5219629406929016, "epoch": 4.27, "learning_rate": 1.949643762830576e-05, "loss": 0.6015, "step": 5052, "task_loss": 0.25942450761795044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6487494707107544, "epoch": 4.27, "learning_rate": 1.949039971017993e-05, "loss": 0.6842, "step": 5053, "task_loss": 0.9145632386207581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7413905262947083, "epoch": 4.27, "learning_rate": 1.94843617920541e-05, "loss": 0.5048, "step": 5054, "task_loss": 1.3290339708328247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7212687134742737, "epoch": 4.27, "learning_rate": 1.947832387392827e-05, "loss": 0.6062, "step": 5055, "task_loss": 0.7027802467346191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8545883893966675, "epoch": 4.27, "learning_rate": 1.9472285955802442e-05, "loss": 0.5267, "step": 5056, "task_loss": 1.0202313661575317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0464457273483276, "epoch": 4.27, "learning_rate": 1.946624803767661e-05, "loss": 0.6722, "step": 5057, "task_loss": 1.4713572263717651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4243375360965729, "epoch": 4.28, "learning_rate": 1.946021011955078e-05, "loss": 0.4315, "step": 5058, "task_loss": 0.29880428314208984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6840518116950989, "epoch": 4.28, "learning_rate": 1.945417220142495e-05, "loss": 0.7085, "step": 5059, "task_loss": 0.4467519223690033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6907805800437927, "epoch": 4.28, "learning_rate": 1.9448134283299118e-05, "loss": 0.5925, "step": 5060, "task_loss": 0.8569194674491882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3326148986816406, "epoch": 4.28, "learning_rate": 1.944209636517329e-05, "loss": 0.5376, "step": 5061, "task_loss": 0.3716012239456177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3155145049095154, "epoch": 4.28, "learning_rate": 1.943605844704746e-05, "loss": 0.5509, "step": 5062, "task_loss": 0.9305038452148438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44722580909729004, "epoch": 4.28, "learning_rate": 1.9430020528921626e-05, "loss": 0.8844, "step": 5063, "task_loss": 0.3477742373943329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6165612936019897, "epoch": 4.28, "learning_rate": 1.94239826107958e-05, "loss": 0.5751, "step": 5064, "task_loss": 0.6828938126564026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5767428278923035, "epoch": 4.28, "learning_rate": 1.9417944692669967e-05, "loss": 0.516, "step": 5065, "task_loss": 0.46416810154914856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4867111146450043, "epoch": 4.28, "learning_rate": 1.9411906774544138e-05, "loss": 0.5893, "step": 5066, "task_loss": 0.9872850179672241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4123658835887909, "epoch": 4.28, "learning_rate": 1.940586885641831e-05, "loss": 0.4523, "step": 5067, "task_loss": 0.30360570549964905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5846825838088989, "epoch": 4.28, "learning_rate": 1.9399830938292476e-05, "loss": 0.5028, "step": 5068, "task_loss": 1.1648516654968262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6611562967300415, "epoch": 4.28, "learning_rate": 1.9393793020166646e-05, "loss": 0.5424, "step": 5069, "task_loss": 0.8596183061599731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7998292446136475, "epoch": 4.29, "learning_rate": 1.9387755102040817e-05, "loss": 0.6471, "step": 5070, "task_loss": 1.4393681287765503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35797637701034546, "epoch": 4.29, "learning_rate": 1.9381717183914987e-05, "loss": 0.3737, "step": 5071, "task_loss": 0.06246805191040039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.530833899974823, "epoch": 4.29, "learning_rate": 1.9375679265789158e-05, "loss": 0.5295, "step": 5072, "task_loss": 0.5695270895957947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.736621081829071, "epoch": 4.29, "learning_rate": 1.9369641347663325e-05, "loss": 0.8477, "step": 5073, "task_loss": 0.6966522932052612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7256484031677246, "epoch": 4.29, "learning_rate": 1.9363603429537496e-05, "loss": 0.5734, "step": 5074, "task_loss": 0.4963006377220154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6662230491638184, "epoch": 4.29, "learning_rate": 1.9357565511411666e-05, "loss": 0.5937, "step": 5075, "task_loss": 0.9909964203834534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.364436537027359, "epoch": 4.29, "learning_rate": 1.9351527593285837e-05, "loss": 0.4042, "step": 5076, "task_loss": 0.25013086199760437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.541681170463562, "epoch": 4.29, "learning_rate": 1.9345489675160004e-05, "loss": 0.5675, "step": 5077, "task_loss": 0.5219549536705017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29979416728019714, "epoch": 4.29, "learning_rate": 1.9339451757034175e-05, "loss": 0.3981, "step": 5078, "task_loss": 0.6289870142936707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7802372574806213, "epoch": 4.29, "learning_rate": 1.9333413838908345e-05, "loss": 0.6249, "step": 5079, "task_loss": 0.9983679056167603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3775298297405243, "epoch": 4.29, "learning_rate": 1.9327375920782516e-05, "loss": 0.4229, "step": 5080, "task_loss": 0.46403446793556213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4789639711380005, "epoch": 4.29, "learning_rate": 1.9321338002656686e-05, "loss": 0.4558, "step": 5081, "task_loss": 0.7437804341316223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3236818015575409, "epoch": 4.3, "learning_rate": 1.9315300084530854e-05, "loss": 0.4585, "step": 5082, "task_loss": 0.43122851848602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.415682315826416, "epoch": 4.3, "learning_rate": 1.9309262166405024e-05, "loss": 0.6093, "step": 5083, "task_loss": 0.6977458596229553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47659558057785034, "epoch": 4.3, "learning_rate": 1.9303224248279195e-05, "loss": 0.5115, "step": 5084, "task_loss": 0.8346410393714905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.14233285188674927, "epoch": 4.3, "learning_rate": 1.9297186330153362e-05, "loss": 0.345, "step": 5085, "task_loss": 0.01219052542001009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7001383304595947, "epoch": 4.3, "learning_rate": 1.9291148412027536e-05, "loss": 0.5325, "step": 5086, "task_loss": 0.373574435710907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7093246579170227, "epoch": 4.3, "learning_rate": 1.9285110493901703e-05, "loss": 0.6403, "step": 5087, "task_loss": 1.1015516519546509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30475446581840515, "epoch": 4.3, "learning_rate": 1.9279072575775874e-05, "loss": 0.4312, "step": 5088, "task_loss": 0.6931011080741882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5989788174629211, "epoch": 4.3, "learning_rate": 1.9273034657650044e-05, "loss": 0.5142, "step": 5089, "task_loss": 0.5983972549438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4142167568206787, "epoch": 4.3, "learning_rate": 1.926699673952421e-05, "loss": 0.4006, "step": 5090, "task_loss": 0.6432098150253296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45670264959335327, "epoch": 4.3, "learning_rate": 1.9260958821398385e-05, "loss": 0.4902, "step": 5091, "task_loss": 0.664253830909729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37471523880958557, "epoch": 4.3, "learning_rate": 1.9254920903272553e-05, "loss": 0.5366, "step": 5092, "task_loss": 0.2518182098865509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5968806743621826, "epoch": 4.3, "learning_rate": 1.924888298514672e-05, "loss": 0.5366, "step": 5093, "task_loss": 1.1582841873168945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5674070715904236, "epoch": 4.31, "learning_rate": 1.9242845067020894e-05, "loss": 0.4621, "step": 5094, "task_loss": 0.767611026763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47268348932266235, "epoch": 4.31, "learning_rate": 1.923680714889506e-05, "loss": 0.4878, "step": 5095, "task_loss": 0.47463980317115784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35396715998649597, "epoch": 4.31, "learning_rate": 1.923076923076923e-05, "loss": 0.4539, "step": 5096, "task_loss": 0.9898560047149658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40556108951568604, "epoch": 4.31, "learning_rate": 1.9224731312643402e-05, "loss": 0.3564, "step": 5097, "task_loss": 0.06661086529493332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5862468481063843, "epoch": 4.31, "learning_rate": 1.921869339451757e-05, "loss": 0.689, "step": 5098, "task_loss": 0.9265633821487427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4073794484138489, "epoch": 4.31, "learning_rate": 1.9212655476391743e-05, "loss": 0.6769, "step": 5099, "task_loss": 0.9757845401763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3788613975048065, "epoch": 4.31, "learning_rate": 1.920661755826591e-05, "loss": 0.5227, "step": 5100, "task_loss": 1.009295105934143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5585248470306396, "epoch": 4.31, "learning_rate": 1.920057964014008e-05, "loss": 0.597, "step": 5101, "task_loss": 1.3039665222167969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4124269485473633, "epoch": 4.31, "learning_rate": 1.919454172201425e-05, "loss": 0.4038, "step": 5102, "task_loss": 0.18576285243034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5707939863204956, "epoch": 4.31, "learning_rate": 1.918850380388842e-05, "loss": 0.6589, "step": 5103, "task_loss": 1.527671217918396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3180033564567566, "epoch": 4.31, "learning_rate": 1.918246588576259e-05, "loss": 0.5257, "step": 5104, "task_loss": 0.6055582165718079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6053884625434875, "epoch": 4.32, "learning_rate": 1.917642796763676e-05, "loss": 0.544, "step": 5105, "task_loss": 0.9133829474449158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5316344499588013, "epoch": 4.32, "learning_rate": 1.917039004951093e-05, "loss": 0.4938, "step": 5106, "task_loss": 1.6233495473861694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5007742047309875, "epoch": 4.32, "learning_rate": 1.91643521313851e-05, "loss": 0.4212, "step": 5107, "task_loss": 0.6284265518188477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2910877764225006, "epoch": 4.32, "learning_rate": 1.9158314213259268e-05, "loss": 0.3779, "step": 5108, "task_loss": 0.7947986125946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8732811212539673, "epoch": 4.32, "learning_rate": 1.915227629513344e-05, "loss": 0.5189, "step": 5109, "task_loss": 0.6603541374206543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.00429105758667, "epoch": 4.32, "learning_rate": 1.914623837700761e-05, "loss": 0.5992, "step": 5110, "task_loss": 0.8881577849388123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6181060075759888, "epoch": 4.32, "learning_rate": 1.914020045888178e-05, "loss": 0.497, "step": 5111, "task_loss": 1.317571997642517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6937109231948853, "epoch": 4.32, "learning_rate": 1.9134162540755947e-05, "loss": 0.489, "step": 5112, "task_loss": 0.6713109016418457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7358061671257019, "epoch": 4.32, "learning_rate": 1.9128124622630118e-05, "loss": 0.5367, "step": 5113, "task_loss": 0.7201684713363647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6936349868774414, "epoch": 4.32, "learning_rate": 1.9122086704504288e-05, "loss": 0.5367, "step": 5114, "task_loss": 0.6637175679206848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6068978309631348, "epoch": 4.32, "learning_rate": 1.911604878637846e-05, "loss": 0.5031, "step": 5115, "task_loss": 0.26968470215797424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40736767649650574, "epoch": 4.32, "learning_rate": 1.911001086825263e-05, "loss": 0.5261, "step": 5116, "task_loss": 0.5758113861083984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4408773183822632, "epoch": 4.33, "learning_rate": 1.9103972950126796e-05, "loss": 0.4445, "step": 5117, "task_loss": 0.4235752820968628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44996312260627747, "epoch": 4.33, "learning_rate": 1.9097935032000967e-05, "loss": 0.4421, "step": 5118, "task_loss": 0.4323626756668091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6320191025733948, "epoch": 4.33, "learning_rate": 1.9091897113875138e-05, "loss": 0.6713, "step": 5119, "task_loss": 0.23968547582626343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6504097580909729, "epoch": 4.33, "learning_rate": 1.9085859195749305e-05, "loss": 0.5741, "step": 5120, "task_loss": 0.6169661283493042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4878043234348297, "epoch": 4.33, "learning_rate": 1.907982127762348e-05, "loss": 0.4876, "step": 5121, "task_loss": 0.863811731338501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46960264444351196, "epoch": 4.33, "learning_rate": 1.9073783359497646e-05, "loss": 0.568, "step": 5122, "task_loss": 1.1279407739639282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7212247252464294, "epoch": 4.33, "learning_rate": 1.9067745441371817e-05, "loss": 0.5578, "step": 5123, "task_loss": 1.0002729892730713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5766572952270508, "epoch": 4.33, "learning_rate": 1.9061707523245987e-05, "loss": 0.3965, "step": 5124, "task_loss": 0.5978819131851196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5018870830535889, "epoch": 4.33, "learning_rate": 1.9055669605120154e-05, "loss": 0.4307, "step": 5125, "task_loss": 0.843283474445343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43089205026626587, "epoch": 4.33, "learning_rate": 1.9049631686994325e-05, "loss": 0.4732, "step": 5126, "task_loss": 1.351424217224121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.525310754776001, "epoch": 4.33, "learning_rate": 1.9043593768868495e-05, "loss": 0.5845, "step": 5127, "task_loss": 0.8180626034736633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5770017504692078, "epoch": 4.33, "learning_rate": 1.9037555850742663e-05, "loss": 0.5818, "step": 5128, "task_loss": 1.3524112701416016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8728530406951904, "epoch": 4.34, "learning_rate": 1.9031517932616837e-05, "loss": 0.5867, "step": 5129, "task_loss": 0.9379435181617737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30259817838668823, "epoch": 4.34, "learning_rate": 1.9025480014491004e-05, "loss": 0.4369, "step": 5130, "task_loss": 0.5340809226036072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32282328605651855, "epoch": 4.34, "learning_rate": 1.9019442096365174e-05, "loss": 0.4358, "step": 5131, "task_loss": 0.8354558944702148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5996333956718445, "epoch": 4.34, "learning_rate": 1.9013404178239345e-05, "loss": 0.5105, "step": 5132, "task_loss": 1.0537067651748657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3505741357803345, "epoch": 4.34, "learning_rate": 1.9007366260113512e-05, "loss": 0.4788, "step": 5133, "task_loss": 0.8713359832763672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4501539468765259, "epoch": 4.34, "learning_rate": 1.9001328341987683e-05, "loss": 0.6639, "step": 5134, "task_loss": 0.597876787185669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44056135416030884, "epoch": 4.34, "learning_rate": 1.8995290423861853e-05, "loss": 0.6057, "step": 5135, "task_loss": 0.3981756269931793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29598724842071533, "epoch": 4.34, "learning_rate": 1.8989252505736024e-05, "loss": 0.3804, "step": 5136, "task_loss": 0.1258758306503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4103579521179199, "epoch": 4.34, "learning_rate": 1.8983214587610194e-05, "loss": 0.4197, "step": 5137, "task_loss": 0.6493760943412781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.632029116153717, "epoch": 4.34, "learning_rate": 1.897717666948436e-05, "loss": 0.5157, "step": 5138, "task_loss": 0.593542754650116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3973308205604553, "epoch": 4.34, "learning_rate": 1.8971138751358532e-05, "loss": 0.5195, "step": 5139, "task_loss": 0.42211097478866577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2950882911682129, "epoch": 4.34, "learning_rate": 1.8965100833232703e-05, "loss": 0.3578, "step": 5140, "task_loss": 0.1372232586145401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.676523745059967, "epoch": 4.35, "learning_rate": 1.8959062915106873e-05, "loss": 0.6681, "step": 5141, "task_loss": 0.6642469763755798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5017623901367188, "epoch": 4.35, "learning_rate": 1.895302499698104e-05, "loss": 0.413, "step": 5142, "task_loss": 0.8043433427810669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34978625178337097, "epoch": 4.35, "learning_rate": 1.894698707885521e-05, "loss": 0.3961, "step": 5143, "task_loss": 0.552386462688446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2580888867378235, "epoch": 4.35, "learning_rate": 1.894094916072938e-05, "loss": 0.4161, "step": 5144, "task_loss": 0.0676063820719719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.367387056350708, "epoch": 4.35, "learning_rate": 1.8934911242603552e-05, "loss": 0.4663, "step": 5145, "task_loss": 0.8344635367393494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7152042984962463, "epoch": 4.35, "learning_rate": 1.8928873324477723e-05, "loss": 0.5356, "step": 5146, "task_loss": 0.8455249667167664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3174892067909241, "epoch": 4.35, "learning_rate": 1.892283540635189e-05, "loss": 0.4547, "step": 5147, "task_loss": 0.14665673673152924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3568580746650696, "epoch": 4.35, "learning_rate": 1.891679748822606e-05, "loss": 0.5103, "step": 5148, "task_loss": 0.40141525864601135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5586023330688477, "epoch": 4.35, "learning_rate": 1.891075957010023e-05, "loss": 0.6699, "step": 5149, "task_loss": 0.7082034349441528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7683988213539124, "epoch": 4.35, "learning_rate": 1.8904721651974398e-05, "loss": 0.6165, "step": 5150, "task_loss": 0.7438834309577942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5254579782485962, "epoch": 4.35, "learning_rate": 1.8898683733848572e-05, "loss": 0.4535, "step": 5151, "task_loss": 0.8049620389938354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.468134343624115, "epoch": 4.35, "learning_rate": 1.889264581572274e-05, "loss": 0.4523, "step": 5152, "task_loss": 1.2789943218231201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6025068759918213, "epoch": 4.36, "learning_rate": 1.888660789759691e-05, "loss": 0.605, "step": 5153, "task_loss": 1.0537078380584717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.373046875, "epoch": 4.36, "learning_rate": 1.888056997947108e-05, "loss": 0.4383, "step": 5154, "task_loss": 1.196982979774475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7268447279930115, "epoch": 4.36, "learning_rate": 1.8874532061345248e-05, "loss": 0.5752, "step": 5155, "task_loss": 0.4414595067501068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2773147225379944, "epoch": 4.36, "learning_rate": 1.886849414321942e-05, "loss": 0.531, "step": 5156, "task_loss": 1.1413606405258179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9555575847625732, "epoch": 4.36, "learning_rate": 1.886245622509359e-05, "loss": 0.6351, "step": 5157, "task_loss": 1.3631845712661743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42526522278785706, "epoch": 4.36, "learning_rate": 1.8856418306967756e-05, "loss": 0.5417, "step": 5158, "task_loss": 0.41887953877449036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8114304542541504, "epoch": 4.36, "learning_rate": 1.885038038884193e-05, "loss": 0.587, "step": 5159, "task_loss": 0.6104749441146851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30991843342781067, "epoch": 4.36, "learning_rate": 1.8844342470716097e-05, "loss": 0.6882, "step": 5160, "task_loss": 0.659593403339386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.637170135974884, "epoch": 4.36, "learning_rate": 1.8838304552590268e-05, "loss": 0.6361, "step": 5161, "task_loss": 0.5559813976287842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26045629382133484, "epoch": 4.36, "learning_rate": 1.8832266634464438e-05, "loss": 0.38, "step": 5162, "task_loss": 0.0815131887793541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34749385714530945, "epoch": 4.36, "learning_rate": 1.8826228716338605e-05, "loss": 0.539, "step": 5163, "task_loss": 0.39933592081069946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.727149248123169, "epoch": 4.36, "learning_rate": 1.882019079821278e-05, "loss": 0.6543, "step": 5164, "task_loss": 1.1992418766021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3597978949546814, "epoch": 4.37, "learning_rate": 1.8814152880086947e-05, "loss": 0.5455, "step": 5165, "task_loss": 1.1979615688323975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6934961080551147, "epoch": 4.37, "learning_rate": 1.8808114961961117e-05, "loss": 0.5409, "step": 5166, "task_loss": 1.4606928825378418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6971839666366577, "epoch": 4.37, "learning_rate": 1.8802077043835288e-05, "loss": 0.5805, "step": 5167, "task_loss": 1.1718096733093262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3281139135360718, "epoch": 4.37, "learning_rate": 1.8796039125709455e-05, "loss": 0.3893, "step": 5168, "task_loss": 0.43466925621032715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.589095950126648, "epoch": 4.37, "learning_rate": 1.8790001207583626e-05, "loss": 0.4319, "step": 5169, "task_loss": 0.7735646367073059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5194735527038574, "epoch": 4.37, "learning_rate": 1.8783963289457796e-05, "loss": 0.4677, "step": 5170, "task_loss": 0.622076153755188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6105556488037109, "epoch": 4.37, "learning_rate": 1.8777925371331967e-05, "loss": 0.6896, "step": 5171, "task_loss": 1.3013780117034912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.576444685459137, "epoch": 4.37, "learning_rate": 1.8771887453206137e-05, "loss": 0.3812, "step": 5172, "task_loss": 0.7041521668434143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6930994391441345, "epoch": 4.37, "learning_rate": 1.8765849535080304e-05, "loss": 0.5149, "step": 5173, "task_loss": 0.5136037468910217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32187363505363464, "epoch": 4.37, "learning_rate": 1.8759811616954475e-05, "loss": 0.5509, "step": 5174, "task_loss": 0.19670556485652924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.63991379737854, "epoch": 4.37, "learning_rate": 1.8753773698828646e-05, "loss": 0.4788, "step": 5175, "task_loss": 1.519219160079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4820733070373535, "epoch": 4.38, "learning_rate": 1.8747735780702816e-05, "loss": 0.4932, "step": 5176, "task_loss": 1.5110890865325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6874973177909851, "epoch": 4.38, "learning_rate": 1.8741697862576983e-05, "loss": 0.5824, "step": 5177, "task_loss": 0.21568679809570312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6455018520355225, "epoch": 4.38, "learning_rate": 1.8735659944451154e-05, "loss": 0.5614, "step": 5178, "task_loss": 0.8208565711975098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37172046303749084, "epoch": 4.38, "learning_rate": 1.8729622026325324e-05, "loss": 0.4298, "step": 5179, "task_loss": 0.4251770079135895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35570043325424194, "epoch": 4.38, "learning_rate": 1.8723584108199495e-05, "loss": 0.4928, "step": 5180, "task_loss": 0.6891517639160156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4183785319328308, "epoch": 4.38, "learning_rate": 1.8717546190073666e-05, "loss": 0.502, "step": 5181, "task_loss": 0.35888147354125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44515299797058105, "epoch": 4.38, "learning_rate": 1.8711508271947833e-05, "loss": 0.5921, "step": 5182, "task_loss": 1.1373136043548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5336998701095581, "epoch": 4.38, "learning_rate": 1.8705470353822003e-05, "loss": 0.6191, "step": 5183, "task_loss": 0.805208146572113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7903681397438049, "epoch": 4.38, "learning_rate": 1.8699432435696174e-05, "loss": 0.7983, "step": 5184, "task_loss": 0.9237998723983765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3902924358844757, "epoch": 4.38, "learning_rate": 1.869339451757034e-05, "loss": 0.4596, "step": 5185, "task_loss": 0.2815991938114166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6060531139373779, "epoch": 4.38, "learning_rate": 1.8687356599444515e-05, "loss": 0.4206, "step": 5186, "task_loss": 0.7269940972328186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3886060118675232, "epoch": 4.38, "learning_rate": 1.8681318681318682e-05, "loss": 0.4631, "step": 5187, "task_loss": 1.064745306968689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.741595983505249, "epoch": 4.39, "learning_rate": 1.8675280763192853e-05, "loss": 0.5722, "step": 5188, "task_loss": 0.7561153173446655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5882607698440552, "epoch": 4.39, "learning_rate": 1.8669242845067023e-05, "loss": 0.5243, "step": 5189, "task_loss": 0.12406136095523834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7448962926864624, "epoch": 4.39, "learning_rate": 1.866320492694119e-05, "loss": 0.6412, "step": 5190, "task_loss": 0.9993990063667297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25048762559890747, "epoch": 4.39, "learning_rate": 1.865716700881536e-05, "loss": 0.3871, "step": 5191, "task_loss": 0.2131929248571396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46440568566322327, "epoch": 4.39, "learning_rate": 1.8651129090689532e-05, "loss": 0.4771, "step": 5192, "task_loss": 1.067097544670105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4386768937110901, "epoch": 4.39, "learning_rate": 1.86450911725637e-05, "loss": 0.4248, "step": 5193, "task_loss": 0.8405083417892456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40268099308013916, "epoch": 4.39, "learning_rate": 1.8639053254437873e-05, "loss": 0.4916, "step": 5194, "task_loss": 0.8314642906188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30733591318130493, "epoch": 4.39, "learning_rate": 1.863301533631204e-05, "loss": 0.3983, "step": 5195, "task_loss": 0.04206152260303497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5220547914505005, "epoch": 4.39, "learning_rate": 1.862697741818621e-05, "loss": 0.718, "step": 5196, "task_loss": 0.27213263511657715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.522191047668457, "epoch": 4.39, "learning_rate": 1.862093950006038e-05, "loss": 0.3941, "step": 5197, "task_loss": 0.5888574719429016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5882545113563538, "epoch": 4.39, "learning_rate": 1.861490158193455e-05, "loss": 0.5459, "step": 5198, "task_loss": 0.6874248385429382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7507486343383789, "epoch": 4.39, "learning_rate": 1.860886366380872e-05, "loss": 0.6069, "step": 5199, "task_loss": 1.7811335325241089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6033719182014465, "epoch": 4.4, "learning_rate": 1.860282574568289e-05, "loss": 0.5418, "step": 5200, "task_loss": 0.39259931445121765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35461854934692383, "epoch": 4.4, "learning_rate": 1.8596787827557057e-05, "loss": 0.4779, "step": 5201, "task_loss": 1.1613352298736572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5100465416908264, "epoch": 4.4, "learning_rate": 1.859074990943123e-05, "loss": 0.4774, "step": 5202, "task_loss": 0.6522606611251831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3180009424686432, "epoch": 4.4, "learning_rate": 1.8584711991305398e-05, "loss": 0.3993, "step": 5203, "task_loss": 0.7879483699798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7164030075073242, "epoch": 4.4, "learning_rate": 1.857867407317957e-05, "loss": 0.6032, "step": 5204, "task_loss": 1.008669376373291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6012774705886841, "epoch": 4.4, "learning_rate": 1.857263615505374e-05, "loss": 0.6541, "step": 5205, "task_loss": 0.9772782325744629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35195499658584595, "epoch": 4.4, "learning_rate": 1.8566598236927906e-05, "loss": 0.4864, "step": 5206, "task_loss": 0.39875492453575134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47787153720855713, "epoch": 4.4, "learning_rate": 1.8560560318802077e-05, "loss": 0.4436, "step": 5207, "task_loss": 0.7053685784339905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6070903539657593, "epoch": 4.4, "learning_rate": 1.8554522400676247e-05, "loss": 0.47, "step": 5208, "task_loss": 0.9445880651473999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7124163508415222, "epoch": 4.4, "learning_rate": 1.8548484482550418e-05, "loss": 0.4772, "step": 5209, "task_loss": 0.5998092293739319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5284239649772644, "epoch": 4.4, "learning_rate": 1.854244656442459e-05, "loss": 0.6413, "step": 5210, "task_loss": 0.7293675541877747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4566672742366791, "epoch": 4.4, "learning_rate": 1.8536408646298756e-05, "loss": 0.4767, "step": 5211, "task_loss": 0.40886664390563965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4627583622932434, "epoch": 4.41, "learning_rate": 1.8530370728172926e-05, "loss": 0.4265, "step": 5212, "task_loss": 1.441691517829895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3747980296611786, "epoch": 4.41, "learning_rate": 1.8524332810047097e-05, "loss": 0.4517, "step": 5213, "task_loss": 0.6199540495872498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24329079687595367, "epoch": 4.41, "learning_rate": 1.8518294891921267e-05, "loss": 0.3645, "step": 5214, "task_loss": 0.30233871936798096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4385206997394562, "epoch": 4.41, "learning_rate": 1.8512256973795435e-05, "loss": 0.4031, "step": 5215, "task_loss": 0.8836256265640259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30957403779029846, "epoch": 4.41, "learning_rate": 1.8506219055669605e-05, "loss": 0.5083, "step": 5216, "task_loss": 1.136552333831787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5118087530136108, "epoch": 4.41, "learning_rate": 1.8500181137543776e-05, "loss": 0.5646, "step": 5217, "task_loss": 2.026247501373291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4487159252166748, "epoch": 4.41, "learning_rate": 1.8494143219417946e-05, "loss": 0.6313, "step": 5218, "task_loss": 0.32908979058265686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.837873101234436, "epoch": 4.41, "learning_rate": 1.8488105301292117e-05, "loss": 0.6047, "step": 5219, "task_loss": 0.8457399606704712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5687423348426819, "epoch": 4.41, "learning_rate": 1.8482067383166284e-05, "loss": 0.5461, "step": 5220, "task_loss": 0.6929583549499512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3987710475921631, "epoch": 4.41, "learning_rate": 1.8476029465040455e-05, "loss": 0.4148, "step": 5221, "task_loss": 0.9801090955734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6041147708892822, "epoch": 4.41, "learning_rate": 1.8469991546914625e-05, "loss": 0.4634, "step": 5222, "task_loss": 0.6882253289222717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29924583435058594, "epoch": 4.41, "learning_rate": 1.8463953628788792e-05, "loss": 0.6147, "step": 5223, "task_loss": 1.0725750923156738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4224993586540222, "epoch": 4.42, "learning_rate": 1.8457915710662966e-05, "loss": 0.5872, "step": 5224, "task_loss": 0.6673390865325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6655415892601013, "epoch": 4.42, "learning_rate": 1.8451877792537133e-05, "loss": 0.505, "step": 5225, "task_loss": 0.2814217805862427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7435063719749451, "epoch": 4.42, "learning_rate": 1.8445839874411304e-05, "loss": 0.5457, "step": 5226, "task_loss": 0.8360223174095154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43006569147109985, "epoch": 4.42, "learning_rate": 1.8439801956285475e-05, "loss": 0.4652, "step": 5227, "task_loss": 0.8208799958229065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5115324854850769, "epoch": 4.42, "learning_rate": 1.8433764038159642e-05, "loss": 0.5135, "step": 5228, "task_loss": 0.6593109965324402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27685004472732544, "epoch": 4.42, "learning_rate": 1.8427726120033816e-05, "loss": 0.395, "step": 5229, "task_loss": 0.2624698877334595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.571931004524231, "epoch": 4.42, "learning_rate": 1.8421688201907983e-05, "loss": 0.5069, "step": 5230, "task_loss": 0.3697534203529358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5739418864250183, "epoch": 4.42, "learning_rate": 1.841565028378215e-05, "loss": 0.722, "step": 5231, "task_loss": 1.02635657787323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2799665331840515, "epoch": 4.42, "learning_rate": 1.8409612365656324e-05, "loss": 0.4128, "step": 5232, "task_loss": 0.3750864565372467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5008310079574585, "epoch": 4.42, "learning_rate": 1.840357444753049e-05, "loss": 0.6347, "step": 5233, "task_loss": 1.2946327924728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4655146598815918, "epoch": 4.42, "learning_rate": 1.8397536529404662e-05, "loss": 0.4888, "step": 5234, "task_loss": 0.3668901026248932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2846829295158386, "epoch": 4.42, "learning_rate": 1.8391498611278832e-05, "loss": 0.5003, "step": 5235, "task_loss": 1.02667236328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3310955762863159, "epoch": 4.43, "learning_rate": 1.8385460693153e-05, "loss": 0.5809, "step": 5236, "task_loss": 1.1437448263168335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39515721797943115, "epoch": 4.43, "learning_rate": 1.8379422775027174e-05, "loss": 0.4494, "step": 5237, "task_loss": 0.24364520609378815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46504783630371094, "epoch": 4.43, "learning_rate": 1.837338485690134e-05, "loss": 0.6001, "step": 5238, "task_loss": 0.8616440892219543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.764136552810669, "epoch": 4.43, "learning_rate": 1.836734693877551e-05, "loss": 0.5972, "step": 5239, "task_loss": 0.9303487539291382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.253934383392334, "epoch": 4.43, "learning_rate": 1.8361309020649682e-05, "loss": 0.4531, "step": 5240, "task_loss": 0.698129415512085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5719914436340332, "epoch": 4.43, "learning_rate": 1.835527110252385e-05, "loss": 0.5762, "step": 5241, "task_loss": 0.8353196978569031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7070803642272949, "epoch": 4.43, "learning_rate": 1.834923318439802e-05, "loss": 0.5063, "step": 5242, "task_loss": 0.6174041628837585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5050186514854431, "epoch": 4.43, "learning_rate": 1.834319526627219e-05, "loss": 0.6314, "step": 5243, "task_loss": 0.16646148264408112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5740396976470947, "epoch": 4.43, "learning_rate": 1.833715734814636e-05, "loss": 0.4435, "step": 5244, "task_loss": 1.227225422859192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3799850046634674, "epoch": 4.43, "learning_rate": 1.833111943002053e-05, "loss": 0.5202, "step": 5245, "task_loss": 0.3683261275291443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3593989908695221, "epoch": 4.43, "learning_rate": 1.83250815118947e-05, "loss": 0.4351, "step": 5246, "task_loss": 0.8734204769134521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46893110871315, "epoch": 4.44, "learning_rate": 1.831904359376887e-05, "loss": 0.5102, "step": 5247, "task_loss": 0.46257284283638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48942768573760986, "epoch": 4.44, "learning_rate": 1.831300567564304e-05, "loss": 0.5186, "step": 5248, "task_loss": 0.3115019202232361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5077903270721436, "epoch": 4.44, "learning_rate": 1.830696775751721e-05, "loss": 0.5817, "step": 5249, "task_loss": 0.8587189316749573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7223268747329712, "epoch": 4.44, "learning_rate": 1.8300929839391377e-05, "loss": 0.6004, "step": 5250, "task_loss": 0.47751355171203613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5748900771141052, "epoch": 4.44, "learning_rate": 1.8294891921265548e-05, "loss": 0.4794, "step": 5251, "task_loss": 0.40447402000427246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3141838312149048, "epoch": 4.44, "learning_rate": 1.828885400313972e-05, "loss": 0.4846, "step": 5252, "task_loss": 0.9764127731323242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48671069741249084, "epoch": 4.44, "learning_rate": 1.828281608501389e-05, "loss": 0.4711, "step": 5253, "task_loss": 0.8839144110679626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5850273370742798, "epoch": 4.44, "learning_rate": 1.827677816688806e-05, "loss": 0.6087, "step": 5254, "task_loss": 0.8768379092216492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6182278394699097, "epoch": 4.44, "learning_rate": 1.8270740248762227e-05, "loss": 0.5383, "step": 5255, "task_loss": 1.4835466146469116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22329722344875336, "epoch": 4.44, "learning_rate": 1.8264702330636397e-05, "loss": 0.4866, "step": 5256, "task_loss": 0.32439878582954407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5534935593605042, "epoch": 4.44, "learning_rate": 1.8258664412510568e-05, "loss": 0.5823, "step": 5257, "task_loss": 0.43820783495903015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4369826316833496, "epoch": 4.44, "learning_rate": 1.8252626494384735e-05, "loss": 0.4399, "step": 5258, "task_loss": 0.5779245495796204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5637534260749817, "epoch": 4.45, "learning_rate": 1.824658857625891e-05, "loss": 0.5354, "step": 5259, "task_loss": 0.7149650454521179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.845821738243103, "epoch": 4.45, "learning_rate": 1.8240550658133076e-05, "loss": 0.5278, "step": 5260, "task_loss": 0.46729978919029236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7475347518920898, "epoch": 4.45, "learning_rate": 1.8234512740007244e-05, "loss": 0.5444, "step": 5261, "task_loss": 0.76250821352005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6794567108154297, "epoch": 4.45, "learning_rate": 1.8228474821881417e-05, "loss": 0.6284, "step": 5262, "task_loss": 0.4331459105014801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3166576623916626, "epoch": 4.45, "learning_rate": 1.8222436903755585e-05, "loss": 0.4491, "step": 5263, "task_loss": 0.4386400282382965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30151674151420593, "epoch": 4.45, "learning_rate": 1.8216398985629755e-05, "loss": 0.3812, "step": 5264, "task_loss": 0.2955605685710907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.853123664855957, "epoch": 4.45, "learning_rate": 1.8210361067503926e-05, "loss": 0.5466, "step": 5265, "task_loss": 0.5371663570404053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5875374674797058, "epoch": 4.45, "learning_rate": 1.8204323149378093e-05, "loss": 0.6483, "step": 5266, "task_loss": 0.8730911016464233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4760594964027405, "epoch": 4.45, "learning_rate": 1.8198285231252267e-05, "loss": 0.4716, "step": 5267, "task_loss": 1.7761266231536865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.618675947189331, "epoch": 4.45, "learning_rate": 1.8192247313126434e-05, "loss": 0.4724, "step": 5268, "task_loss": 0.45525598526000977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34543949365615845, "epoch": 4.45, "learning_rate": 1.8186209395000605e-05, "loss": 0.446, "step": 5269, "task_loss": 0.09220592677593231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.598749041557312, "epoch": 4.45, "learning_rate": 1.8180171476874775e-05, "loss": 0.5558, "step": 5270, "task_loss": 1.2085145711898804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3262456953525543, "epoch": 4.46, "learning_rate": 1.8174133558748942e-05, "loss": 0.4087, "step": 5271, "task_loss": 0.5155856013298035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.363564133644104, "epoch": 4.46, "learning_rate": 1.8168095640623113e-05, "loss": 0.4454, "step": 5272, "task_loss": 0.49557217955589294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8112093210220337, "epoch": 4.46, "learning_rate": 1.8162057722497284e-05, "loss": 0.576, "step": 5273, "task_loss": 0.7739707231521606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29326656460762024, "epoch": 4.46, "learning_rate": 1.8156019804371454e-05, "loss": 0.484, "step": 5274, "task_loss": 0.36506983637809753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4936417043209076, "epoch": 4.46, "learning_rate": 1.8149981886245625e-05, "loss": 0.3591, "step": 5275, "task_loss": 0.28767508268356323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6677588820457458, "epoch": 4.46, "learning_rate": 1.8143943968119792e-05, "loss": 0.8006, "step": 5276, "task_loss": 0.9888085126876831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4255192279815674, "epoch": 4.46, "learning_rate": 1.8137906049993963e-05, "loss": 0.4322, "step": 5277, "task_loss": 0.8949341773986816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3338794410228729, "epoch": 4.46, "learning_rate": 1.8131868131868133e-05, "loss": 0.6109, "step": 5278, "task_loss": 0.2139960527420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4101320803165436, "epoch": 4.46, "learning_rate": 1.8125830213742304e-05, "loss": 0.4643, "step": 5279, "task_loss": 0.5544992089271545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5465366244316101, "epoch": 4.46, "learning_rate": 1.811979229561647e-05, "loss": 0.527, "step": 5280, "task_loss": 1.4394034147262573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.249984011054039, "epoch": 4.46, "learning_rate": 1.811375437749064e-05, "loss": 0.4388, "step": 5281, "task_loss": 0.3796558380126953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6977836489677429, "epoch": 4.46, "learning_rate": 1.8107716459364812e-05, "loss": 0.4727, "step": 5282, "task_loss": 1.1670849323272705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3595432639122009, "epoch": 4.47, "learning_rate": 1.8101678541238983e-05, "loss": 0.3912, "step": 5283, "task_loss": 0.8764082193374634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43445903062820435, "epoch": 4.47, "learning_rate": 1.8095640623113153e-05, "loss": 0.5729, "step": 5284, "task_loss": 0.6454800367355347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3363853096961975, "epoch": 4.47, "learning_rate": 1.808960270498732e-05, "loss": 0.4989, "step": 5285, "task_loss": 0.09658601135015488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7544049024581909, "epoch": 4.47, "learning_rate": 1.808356478686149e-05, "loss": 0.6114, "step": 5286, "task_loss": 0.6382666826248169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31915730237960815, "epoch": 4.47, "learning_rate": 1.807752686873566e-05, "loss": 0.5325, "step": 5287, "task_loss": 0.3222561478614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39873069524765015, "epoch": 4.47, "learning_rate": 1.807148895060983e-05, "loss": 0.4507, "step": 5288, "task_loss": 0.6492705941200256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3667658567428589, "epoch": 4.47, "learning_rate": 1.8065451032484003e-05, "loss": 0.4031, "step": 5289, "task_loss": 0.4181379973888397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44841498136520386, "epoch": 4.47, "learning_rate": 1.805941311435817e-05, "loss": 0.5069, "step": 5290, "task_loss": 0.25346216559410095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5366259813308716, "epoch": 4.47, "learning_rate": 1.805337519623234e-05, "loss": 0.5799, "step": 5291, "task_loss": 0.6457687020301819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4641053080558777, "epoch": 4.47, "learning_rate": 1.804733727810651e-05, "loss": 0.4812, "step": 5292, "task_loss": 0.3414473831653595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2725278437137604, "epoch": 4.47, "learning_rate": 1.8041299359980678e-05, "loss": 0.5232, "step": 5293, "task_loss": 0.17817308008670807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2974191904067993, "epoch": 4.47, "learning_rate": 1.8035261441854852e-05, "loss": 0.4596, "step": 5294, "task_loss": 0.5277233719825745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7782220840454102, "epoch": 4.48, "learning_rate": 1.802922352372902e-05, "loss": 0.4658, "step": 5295, "task_loss": 0.9573503732681274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3421165347099304, "epoch": 4.48, "learning_rate": 1.8023185605603186e-05, "loss": 0.4512, "step": 5296, "task_loss": 1.1748689413070679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4987561106681824, "epoch": 4.48, "learning_rate": 1.801714768747736e-05, "loss": 0.4432, "step": 5297, "task_loss": 0.5137851238250732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.585494875907898, "epoch": 4.48, "learning_rate": 1.8011109769351528e-05, "loss": 0.6453, "step": 5298, "task_loss": 1.053551435470581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9408829212188721, "epoch": 4.48, "learning_rate": 1.8005071851225698e-05, "loss": 0.6186, "step": 5299, "task_loss": 0.8208414316177368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.268271803855896, "epoch": 4.48, "learning_rate": 1.799903393309987e-05, "loss": 0.404, "step": 5300, "task_loss": 0.4793378710746765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4603778123855591, "epoch": 4.48, "learning_rate": 1.7992996014974036e-05, "loss": 0.5401, "step": 5301, "task_loss": 1.6211289167404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5672343969345093, "epoch": 4.48, "learning_rate": 1.798695809684821e-05, "loss": 0.5565, "step": 5302, "task_loss": 0.3223145306110382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5029144883155823, "epoch": 4.48, "learning_rate": 1.7980920178722377e-05, "loss": 0.4643, "step": 5303, "task_loss": 1.3947439193725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8028075098991394, "epoch": 4.48, "learning_rate": 1.7974882260596548e-05, "loss": 0.8083, "step": 5304, "task_loss": 1.7739585638046265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6616266369819641, "epoch": 4.48, "learning_rate": 1.7968844342470718e-05, "loss": 0.6828, "step": 5305, "task_loss": 0.996625542640686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3628544211387634, "epoch": 4.48, "learning_rate": 1.7962806424344885e-05, "loss": 0.4394, "step": 5306, "task_loss": 0.2698367238044739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6246098279953003, "epoch": 4.49, "learning_rate": 1.7956768506219056e-05, "loss": 0.5343, "step": 5307, "task_loss": 0.4993918240070343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6738573312759399, "epoch": 4.49, "learning_rate": 1.7950730588093226e-05, "loss": 0.7414, "step": 5308, "task_loss": 0.8142508268356323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6976404190063477, "epoch": 4.49, "learning_rate": 1.7944692669967397e-05, "loss": 0.4056, "step": 5309, "task_loss": 1.5880228281021118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3389049768447876, "epoch": 4.49, "learning_rate": 1.7938654751841568e-05, "loss": 0.4895, "step": 5310, "task_loss": 1.3826721906661987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3764387369155884, "epoch": 4.49, "learning_rate": 1.7932616833715735e-05, "loss": 0.4961, "step": 5311, "task_loss": 0.6057107448577881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27936801314353943, "epoch": 4.49, "learning_rate": 1.7926578915589905e-05, "loss": 0.4838, "step": 5312, "task_loss": 0.41532421112060547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27112525701522827, "epoch": 4.49, "learning_rate": 1.7920540997464076e-05, "loss": 0.4363, "step": 5313, "task_loss": 0.4502791166305542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6065534353256226, "epoch": 4.49, "learning_rate": 1.7914503079338247e-05, "loss": 0.5053, "step": 5314, "task_loss": 0.835750937461853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4081474542617798, "epoch": 4.49, "learning_rate": 1.7908465161212414e-05, "loss": 0.5001, "step": 5315, "task_loss": 0.1353079080581665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3058404326438904, "epoch": 4.49, "learning_rate": 1.7902427243086584e-05, "loss": 0.4807, "step": 5316, "task_loss": 0.4571177363395691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.383163720369339, "epoch": 4.49, "learning_rate": 1.7896389324960755e-05, "loss": 0.4695, "step": 5317, "task_loss": 0.12312687188386917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7906149625778198, "epoch": 4.5, "learning_rate": 1.7890351406834922e-05, "loss": 0.5679, "step": 5318, "task_loss": 0.3419434130191803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5423780083656311, "epoch": 4.5, "learning_rate": 1.7884313488709096e-05, "loss": 0.572, "step": 5319, "task_loss": 0.8189250826835632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6535266637802124, "epoch": 4.5, "learning_rate": 1.7878275570583263e-05, "loss": 0.5148, "step": 5320, "task_loss": 1.0353374481201172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41892099380493164, "epoch": 4.5, "learning_rate": 1.7872237652457434e-05, "loss": 0.4203, "step": 5321, "task_loss": 0.48897427320480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3676598370075226, "epoch": 4.5, "learning_rate": 1.7866199734331604e-05, "loss": 0.386, "step": 5322, "task_loss": 0.23943567276000977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5012972354888916, "epoch": 4.5, "learning_rate": 1.786016181620577e-05, "loss": 0.4702, "step": 5323, "task_loss": 0.13851799070835114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39605340361595154, "epoch": 4.5, "learning_rate": 1.7854123898079945e-05, "loss": 0.4018, "step": 5324, "task_loss": 0.16603189706802368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4488663673400879, "epoch": 4.5, "learning_rate": 1.7848085979954113e-05, "loss": 0.5076, "step": 5325, "task_loss": 0.4107595682144165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.250594824552536, "epoch": 4.5, "learning_rate": 1.784204806182828e-05, "loss": 0.6356, "step": 5326, "task_loss": 0.49086838960647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7477624416351318, "epoch": 4.5, "learning_rate": 1.7836010143702454e-05, "loss": 0.4538, "step": 5327, "task_loss": 0.5824536085128784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5346421003341675, "epoch": 4.5, "learning_rate": 1.782997222557662e-05, "loss": 0.383, "step": 5328, "task_loss": 0.5682054758071899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5592981576919556, "epoch": 4.5, "learning_rate": 1.782393430745079e-05, "loss": 0.4773, "step": 5329, "task_loss": 0.8664562702178955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42156779766082764, "epoch": 4.51, "learning_rate": 1.7817896389324962e-05, "loss": 0.4397, "step": 5330, "task_loss": 0.08832123875617981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5465600490570068, "epoch": 4.51, "learning_rate": 1.781185847119913e-05, "loss": 0.4458, "step": 5331, "task_loss": 1.0452375411987305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5026817917823792, "epoch": 4.51, "learning_rate": 1.7805820553073303e-05, "loss": 0.5313, "step": 5332, "task_loss": 1.053684949874878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4082244038581848, "epoch": 4.51, "learning_rate": 1.779978263494747e-05, "loss": 0.5175, "step": 5333, "task_loss": 0.27448105812072754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8339166045188904, "epoch": 4.51, "learning_rate": 1.779374471682164e-05, "loss": 0.9317, "step": 5334, "task_loss": 1.4815112352371216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.528512716293335, "epoch": 4.51, "learning_rate": 1.778770679869581e-05, "loss": 0.5277, "step": 5335, "task_loss": 1.1600215435028076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5920917987823486, "epoch": 4.51, "learning_rate": 1.778166888056998e-05, "loss": 0.4269, "step": 5336, "task_loss": 0.8102577924728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49797794222831726, "epoch": 4.51, "learning_rate": 1.777563096244415e-05, "loss": 0.4414, "step": 5337, "task_loss": 0.34728923439979553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6472315192222595, "epoch": 4.51, "learning_rate": 1.776959304431832e-05, "loss": 0.6574, "step": 5338, "task_loss": 1.7491613626480103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5813652873039246, "epoch": 4.51, "learning_rate": 1.776355512619249e-05, "loss": 0.4419, "step": 5339, "task_loss": 0.5841543674468994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3097229599952698, "epoch": 4.51, "learning_rate": 1.775751720806666e-05, "loss": 0.5605, "step": 5340, "task_loss": 0.9844646453857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4437071979045868, "epoch": 4.51, "learning_rate": 1.7751479289940828e-05, "loss": 0.5001, "step": 5341, "task_loss": 0.5132348537445068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3323069214820862, "epoch": 4.52, "learning_rate": 1.7745441371815e-05, "loss": 0.4748, "step": 5342, "task_loss": 0.05384235829114914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3070586025714874, "epoch": 4.52, "learning_rate": 1.773940345368917e-05, "loss": 0.5171, "step": 5343, "task_loss": 0.13457843661308289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5785703659057617, "epoch": 4.52, "learning_rate": 1.773336553556334e-05, "loss": 0.6213, "step": 5344, "task_loss": 0.82038813829422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34522831439971924, "epoch": 4.52, "learning_rate": 1.7727327617437507e-05, "loss": 0.4157, "step": 5345, "task_loss": 0.3677099049091339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26567530632019043, "epoch": 4.52, "learning_rate": 1.7721289699311678e-05, "loss": 0.5837, "step": 5346, "task_loss": 0.19009901583194733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5682413578033447, "epoch": 4.52, "learning_rate": 1.7715251781185848e-05, "loss": 0.467, "step": 5347, "task_loss": 1.4593312740325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38993000984191895, "epoch": 4.52, "learning_rate": 1.770921386306002e-05, "loss": 0.5105, "step": 5348, "task_loss": 0.4969681203365326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7158200740814209, "epoch": 4.52, "learning_rate": 1.770317594493419e-05, "loss": 0.6092, "step": 5349, "task_loss": 0.9733288288116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5068963170051575, "epoch": 4.52, "learning_rate": 1.7697138026808357e-05, "loss": 0.5364, "step": 5350, "task_loss": 0.8530793190002441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39091604948043823, "epoch": 4.52, "learning_rate": 1.7691100108682527e-05, "loss": 0.5089, "step": 5351, "task_loss": 0.4238569438457489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22887153923511505, "epoch": 4.52, "learning_rate": 1.7685062190556698e-05, "loss": 0.4738, "step": 5352, "task_loss": 0.1170913502573967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7772027254104614, "epoch": 4.52, "learning_rate": 1.7679024272430865e-05, "loss": 0.5625, "step": 5353, "task_loss": 1.0978670120239258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.448789119720459, "epoch": 4.53, "learning_rate": 1.767298635430504e-05, "loss": 0.479, "step": 5354, "task_loss": 0.906532347202301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3463450074195862, "epoch": 4.53, "learning_rate": 1.7666948436179206e-05, "loss": 0.4775, "step": 5355, "task_loss": 0.5426546335220337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6102800369262695, "epoch": 4.53, "learning_rate": 1.7660910518053377e-05, "loss": 0.5927, "step": 5356, "task_loss": 1.043286919593811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4140098989009857, "epoch": 4.53, "learning_rate": 1.7654872599927547e-05, "loss": 0.6372, "step": 5357, "task_loss": 0.318030446767807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0827990770339966, "epoch": 4.53, "learning_rate": 1.7648834681801714e-05, "loss": 0.6889, "step": 5358, "task_loss": 1.0528075695037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0321016311645508, "epoch": 4.53, "learning_rate": 1.764279676367589e-05, "loss": 0.5987, "step": 5359, "task_loss": 1.218461513519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7496699094772339, "epoch": 4.53, "learning_rate": 1.7636758845550056e-05, "loss": 0.5928, "step": 5360, "task_loss": 1.0113623142242432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44815367460250854, "epoch": 4.53, "learning_rate": 1.7630720927424223e-05, "loss": 0.5364, "step": 5361, "task_loss": 0.4028920829296112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6147949695587158, "epoch": 4.53, "learning_rate": 1.7624683009298397e-05, "loss": 0.6673, "step": 5362, "task_loss": 0.5058900713920593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5667515397071838, "epoch": 4.53, "learning_rate": 1.7618645091172564e-05, "loss": 0.6834, "step": 5363, "task_loss": 1.2718034982681274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5230085253715515, "epoch": 4.53, "learning_rate": 1.7612607173046734e-05, "loss": 0.5252, "step": 5364, "task_loss": 0.8540233969688416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5938000679016113, "epoch": 4.53, "learning_rate": 1.7606569254920905e-05, "loss": 0.5357, "step": 5365, "task_loss": 0.6390989422798157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7102471590042114, "epoch": 4.54, "learning_rate": 1.7600531336795072e-05, "loss": 0.5168, "step": 5366, "task_loss": 0.9288778901100159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44815492630004883, "epoch": 4.54, "learning_rate": 1.7594493418669246e-05, "loss": 0.4686, "step": 5367, "task_loss": 0.4388757050037384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35524606704711914, "epoch": 4.54, "learning_rate": 1.7588455500543413e-05, "loss": 0.3171, "step": 5368, "task_loss": 0.3818599283695221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3984129726886749, "epoch": 4.54, "learning_rate": 1.7582417582417584e-05, "loss": 0.5666, "step": 5369, "task_loss": 0.576820433139801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39762088656425476, "epoch": 4.54, "learning_rate": 1.7576379664291754e-05, "loss": 0.4554, "step": 5370, "task_loss": 0.6187169551849365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5552012920379639, "epoch": 4.54, "learning_rate": 1.757034174616592e-05, "loss": 0.5831, "step": 5371, "task_loss": 0.601713240146637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35238274931907654, "epoch": 4.54, "learning_rate": 1.7564303828040092e-05, "loss": 0.4057, "step": 5372, "task_loss": 0.27822574973106384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48213905096054077, "epoch": 4.54, "learning_rate": 1.7558265909914263e-05, "loss": 0.436, "step": 5373, "task_loss": 1.175932765007019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2747301459312439, "epoch": 4.54, "learning_rate": 1.755222799178843e-05, "loss": 0.4423, "step": 5374, "task_loss": 0.44009676575660706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6568558216094971, "epoch": 4.54, "learning_rate": 1.7546190073662604e-05, "loss": 0.7501, "step": 5375, "task_loss": 0.5169509053230286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3097965121269226, "epoch": 4.54, "learning_rate": 1.754015215553677e-05, "loss": 0.503, "step": 5376, "task_loss": 0.14502836763858795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5641940832138062, "epoch": 4.54, "learning_rate": 1.753411423741094e-05, "loss": 0.445, "step": 5377, "task_loss": 0.3894634246826172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4124983251094818, "epoch": 4.55, "learning_rate": 1.7528076319285112e-05, "loss": 0.3643, "step": 5378, "task_loss": 0.4353872537612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46088892221450806, "epoch": 4.55, "learning_rate": 1.752203840115928e-05, "loss": 0.6306, "step": 5379, "task_loss": 0.4199133515357971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6478526592254639, "epoch": 4.55, "learning_rate": 1.751600048303345e-05, "loss": 0.538, "step": 5380, "task_loss": 0.6538507342338562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5436025261878967, "epoch": 4.55, "learning_rate": 1.750996256490762e-05, "loss": 0.5435, "step": 5381, "task_loss": 0.6517281532287598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6368356943130493, "epoch": 4.55, "learning_rate": 1.750392464678179e-05, "loss": 0.5425, "step": 5382, "task_loss": 1.0692280530929565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6070291996002197, "epoch": 4.55, "learning_rate": 1.749788672865596e-05, "loss": 0.5608, "step": 5383, "task_loss": 1.1291602849960327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6542998552322388, "epoch": 4.55, "learning_rate": 1.749184881053013e-05, "loss": 0.6098, "step": 5384, "task_loss": 0.4997357428073883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3936842679977417, "epoch": 4.55, "learning_rate": 1.74858108924043e-05, "loss": 0.4516, "step": 5385, "task_loss": 0.8660577535629272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3022080659866333, "epoch": 4.55, "learning_rate": 1.747977297427847e-05, "loss": 0.6486, "step": 5386, "task_loss": 1.1156678199768066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23548036813735962, "epoch": 4.55, "learning_rate": 1.747373505615264e-05, "loss": 0.6573, "step": 5387, "task_loss": 1.430626392364502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4717443883419037, "epoch": 4.55, "learning_rate": 1.7467697138026808e-05, "loss": 0.6065, "step": 5388, "task_loss": 0.7611106038093567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7046002149581909, "epoch": 4.56, "learning_rate": 1.746165921990098e-05, "loss": 0.5374, "step": 5389, "task_loss": 1.1918392181396484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0158601999282837, "epoch": 4.56, "learning_rate": 1.745562130177515e-05, "loss": 0.6643, "step": 5390, "task_loss": 2.2650279998779297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4283023476600647, "epoch": 4.56, "learning_rate": 1.7449583383649316e-05, "loss": 0.4511, "step": 5391, "task_loss": 0.7552892565727234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32387951016426086, "epoch": 4.56, "learning_rate": 1.744354546552349e-05, "loss": 0.441, "step": 5392, "task_loss": 0.5742499232292175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26102110743522644, "epoch": 4.56, "learning_rate": 1.7437507547397657e-05, "loss": 0.5776, "step": 5393, "task_loss": 0.9389200210571289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47874292731285095, "epoch": 4.56, "learning_rate": 1.7431469629271828e-05, "loss": 0.5667, "step": 5394, "task_loss": 0.6723522543907166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5342756509780884, "epoch": 4.56, "learning_rate": 1.7425431711146e-05, "loss": 0.4889, "step": 5395, "task_loss": 0.4787804186344147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2845149338245392, "epoch": 4.56, "learning_rate": 1.7419393793020166e-05, "loss": 0.5459, "step": 5396, "task_loss": 0.39839816093444824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5762616395950317, "epoch": 4.56, "learning_rate": 1.741335587489434e-05, "loss": 0.6429, "step": 5397, "task_loss": 1.0555853843688965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3270818591117859, "epoch": 4.56, "learning_rate": 1.7407317956768507e-05, "loss": 0.4264, "step": 5398, "task_loss": 1.3446632623672485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3082667291164398, "epoch": 4.56, "learning_rate": 1.7401280038642674e-05, "loss": 0.4011, "step": 5399, "task_loss": 0.3053587079048157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17635950446128845, "epoch": 4.56, "learning_rate": 1.7395242120516848e-05, "loss": 0.5058, "step": 5400, "task_loss": 0.28611624240875244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.567571759223938, "epoch": 4.57, "learning_rate": 1.7389204202391015e-05, "loss": 0.4869, "step": 5401, "task_loss": 0.4277133047580719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6084592938423157, "epoch": 4.57, "learning_rate": 1.7383166284265186e-05, "loss": 0.5504, "step": 5402, "task_loss": 0.44851553440093994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0547802448272705, "epoch": 4.57, "learning_rate": 1.7377128366139356e-05, "loss": 0.6098, "step": 5403, "task_loss": 0.41734224557876587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5102459788322449, "epoch": 4.57, "learning_rate": 1.7371090448013523e-05, "loss": 0.571, "step": 5404, "task_loss": 0.8372790813446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4160216450691223, "epoch": 4.57, "learning_rate": 1.7365052529887697e-05, "loss": 0.4595, "step": 5405, "task_loss": 0.07762360572814941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3123959004878998, "epoch": 4.57, "learning_rate": 1.7359014611761865e-05, "loss": 0.4323, "step": 5406, "task_loss": 1.3092138767242432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39229950308799744, "epoch": 4.57, "learning_rate": 1.7352976693636035e-05, "loss": 0.4534, "step": 5407, "task_loss": 1.0657851696014404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8867568969726562, "epoch": 4.57, "learning_rate": 1.7346938775510206e-05, "loss": 0.6639, "step": 5408, "task_loss": 1.0215117931365967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49989867210388184, "epoch": 4.57, "learning_rate": 1.7340900857384373e-05, "loss": 0.6464, "step": 5409, "task_loss": 0.24181507527828217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15571282804012299, "epoch": 4.57, "learning_rate": 1.7334862939258543e-05, "loss": 0.3863, "step": 5410, "task_loss": 0.4521203339099884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2486138790845871, "epoch": 4.57, "learning_rate": 1.7328825021132714e-05, "loss": 0.497, "step": 5411, "task_loss": 0.44419437646865845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48378828167915344, "epoch": 4.57, "learning_rate": 1.7322787103006885e-05, "loss": 0.502, "step": 5412, "task_loss": 1.1538996696472168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.272225946187973, "epoch": 4.58, "learning_rate": 1.7316749184881055e-05, "loss": 0.4748, "step": 5413, "task_loss": 0.34107521176338196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5830373764038086, "epoch": 4.58, "learning_rate": 1.7310711266755222e-05, "loss": 0.509, "step": 5414, "task_loss": 0.4812740385532379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4598248600959778, "epoch": 4.58, "learning_rate": 1.7304673348629393e-05, "loss": 0.3967, "step": 5415, "task_loss": 0.45853331685066223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21499529480934143, "epoch": 4.58, "learning_rate": 1.7298635430503563e-05, "loss": 0.3623, "step": 5416, "task_loss": 0.28228867053985596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6637423634529114, "epoch": 4.58, "learning_rate": 1.7292597512377734e-05, "loss": 0.6109, "step": 5417, "task_loss": 1.0034449100494385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35611775517463684, "epoch": 4.58, "learning_rate": 1.72865595942519e-05, "loss": 0.5057, "step": 5418, "task_loss": 0.7666125297546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4196309447288513, "epoch": 4.58, "learning_rate": 1.7280521676126072e-05, "loss": 0.431, "step": 5419, "task_loss": 0.6954057812690735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5230875015258789, "epoch": 4.58, "learning_rate": 1.7274483758000242e-05, "loss": 0.452, "step": 5420, "task_loss": 0.4380897879600525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7252964973449707, "epoch": 4.58, "learning_rate": 1.7268445839874413e-05, "loss": 0.7035, "step": 5421, "task_loss": 1.138519525527954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4954253137111664, "epoch": 4.58, "learning_rate": 1.7262407921748584e-05, "loss": 0.5144, "step": 5422, "task_loss": 0.27527084946632385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1861920803785324, "epoch": 4.58, "learning_rate": 1.725637000362275e-05, "loss": 0.3423, "step": 5423, "task_loss": 0.5326087474822998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4058866798877716, "epoch": 4.58, "learning_rate": 1.725033208549692e-05, "loss": 0.5362, "step": 5424, "task_loss": 0.2310435026884079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33664992451667786, "epoch": 4.59, "learning_rate": 1.7244294167371092e-05, "loss": 0.489, "step": 5425, "task_loss": 0.37981417775154114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3225569725036621, "epoch": 4.59, "learning_rate": 1.723825624924526e-05, "loss": 0.4404, "step": 5426, "task_loss": 0.11073686182498932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5739659070968628, "epoch": 4.59, "learning_rate": 1.7232218331119433e-05, "loss": 0.484, "step": 5427, "task_loss": 1.5879143476486206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7419968247413635, "epoch": 4.59, "learning_rate": 1.72261804129936e-05, "loss": 0.4549, "step": 5428, "task_loss": 0.17074747383594513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5476052761077881, "epoch": 4.59, "learning_rate": 1.722014249486777e-05, "loss": 0.5176, "step": 5429, "task_loss": 0.5691224336624146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7450308203697205, "epoch": 4.59, "learning_rate": 1.721410457674194e-05, "loss": 0.4774, "step": 5430, "task_loss": 0.827069103717804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40565258264541626, "epoch": 4.59, "learning_rate": 1.720806665861611e-05, "loss": 0.3981, "step": 5431, "task_loss": 0.5419377088546753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5382448434829712, "epoch": 4.59, "learning_rate": 1.7202028740490282e-05, "loss": 0.4304, "step": 5432, "task_loss": 0.5731014609336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2793627679347992, "epoch": 4.59, "learning_rate": 1.719599082236445e-05, "loss": 0.5521, "step": 5433, "task_loss": 0.42362678050994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4054490327835083, "epoch": 4.59, "learning_rate": 1.7189952904238617e-05, "loss": 0.508, "step": 5434, "task_loss": 0.6656392812728882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5713086128234863, "epoch": 4.59, "learning_rate": 1.718391498611279e-05, "loss": 0.5252, "step": 5435, "task_loss": 0.8863192200660706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5195351839065552, "epoch": 4.59, "learning_rate": 1.7177877067986958e-05, "loss": 0.4355, "step": 5436, "task_loss": 1.3757165670394897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5164188146591187, "epoch": 4.6, "learning_rate": 1.717183914986113e-05, "loss": 0.4118, "step": 5437, "task_loss": 1.4070080518722534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7834854125976562, "epoch": 4.6, "learning_rate": 1.71658012317353e-05, "loss": 0.4677, "step": 5438, "task_loss": 0.6979140043258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21009454131126404, "epoch": 4.6, "learning_rate": 1.7159763313609466e-05, "loss": 0.3682, "step": 5439, "task_loss": 0.581680953502655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7120217680931091, "epoch": 4.6, "learning_rate": 1.715372539548364e-05, "loss": 0.7316, "step": 5440, "task_loss": 0.6432896256446838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7193764448165894, "epoch": 4.6, "learning_rate": 1.7147687477357807e-05, "loss": 0.5476, "step": 5441, "task_loss": 1.3694767951965332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38746583461761475, "epoch": 4.6, "learning_rate": 1.7141649559231978e-05, "loss": 0.4159, "step": 5442, "task_loss": 0.6383300423622131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20137155055999756, "epoch": 4.6, "learning_rate": 1.713561164110615e-05, "loss": 0.3626, "step": 5443, "task_loss": 0.11660199612379074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7759224772453308, "epoch": 4.6, "learning_rate": 1.7129573722980316e-05, "loss": 0.6706, "step": 5444, "task_loss": 0.7136790156364441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3924662172794342, "epoch": 4.6, "learning_rate": 1.7123535804854486e-05, "loss": 0.5078, "step": 5445, "task_loss": 0.24098271131515503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.872663676738739, "epoch": 4.6, "learning_rate": 1.7117497886728657e-05, "loss": 0.6756, "step": 5446, "task_loss": 0.8768436312675476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5718363523483276, "epoch": 4.6, "learning_rate": 1.7111459968602827e-05, "loss": 0.5372, "step": 5447, "task_loss": 0.46772676706314087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3965407907962799, "epoch": 4.6, "learning_rate": 1.7105422050476995e-05, "loss": 0.402, "step": 5448, "task_loss": 0.4877094626426697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5396556258201599, "epoch": 4.61, "learning_rate": 1.7099384132351165e-05, "loss": 0.4931, "step": 5449, "task_loss": 0.8469643592834473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38385307788848877, "epoch": 4.61, "learning_rate": 1.7093346214225336e-05, "loss": 0.4464, "step": 5450, "task_loss": 0.3472599387168884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4008443355560303, "epoch": 4.61, "learning_rate": 1.7087308296099506e-05, "loss": 0.4277, "step": 5451, "task_loss": 0.6118302345275879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37542954087257385, "epoch": 4.61, "learning_rate": 1.7081270377973677e-05, "loss": 0.4545, "step": 5452, "task_loss": 0.6742677688598633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8103146553039551, "epoch": 4.61, "learning_rate": 1.7075232459847844e-05, "loss": 0.7134, "step": 5453, "task_loss": 0.8821404576301575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5318930149078369, "epoch": 4.61, "learning_rate": 1.7069194541722015e-05, "loss": 0.5496, "step": 5454, "task_loss": 1.2930275201797485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4825977683067322, "epoch": 4.61, "learning_rate": 1.7063156623596185e-05, "loss": 0.6599, "step": 5455, "task_loss": 0.7124821543693542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19768565893173218, "epoch": 4.61, "learning_rate": 1.7057118705470352e-05, "loss": 0.5225, "step": 5456, "task_loss": 0.0757354348897934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5554444193840027, "epoch": 4.61, "learning_rate": 1.7051080787344526e-05, "loss": 0.5496, "step": 5457, "task_loss": 1.1591628789901733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2524852156639099, "epoch": 4.61, "learning_rate": 1.7045042869218694e-05, "loss": 0.4676, "step": 5458, "task_loss": 0.6055819988250732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5855680108070374, "epoch": 4.61, "learning_rate": 1.7039004951092864e-05, "loss": 0.6348, "step": 5459, "task_loss": 1.4980745315551758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2372620552778244, "epoch": 4.61, "learning_rate": 1.7032967032967035e-05, "loss": 0.4694, "step": 5460, "task_loss": 0.34745949506759644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2282392829656601, "epoch": 4.62, "learning_rate": 1.7026929114841202e-05, "loss": 0.4466, "step": 5461, "task_loss": 0.17458760738372803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36137285828590393, "epoch": 4.62, "learning_rate": 1.7020891196715376e-05, "loss": 0.4898, "step": 5462, "task_loss": 1.096798300743103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8885866403579712, "epoch": 4.62, "learning_rate": 1.7014853278589543e-05, "loss": 0.535, "step": 5463, "task_loss": 1.8363418579101562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4085941016674042, "epoch": 4.62, "learning_rate": 1.700881536046371e-05, "loss": 0.4095, "step": 5464, "task_loss": 0.6862176060676575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2316611111164093, "epoch": 4.62, "learning_rate": 1.7002777442337884e-05, "loss": 0.4696, "step": 5465, "task_loss": 0.4384218454360962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22684870660305023, "epoch": 4.62, "learning_rate": 1.699673952421205e-05, "loss": 0.4614, "step": 5466, "task_loss": 0.6826119422912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48724451661109924, "epoch": 4.62, "learning_rate": 1.6990701606086222e-05, "loss": 0.3695, "step": 5467, "task_loss": 0.6927094459533691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6074528694152832, "epoch": 4.62, "learning_rate": 1.6984663687960393e-05, "loss": 0.5865, "step": 5468, "task_loss": 0.263081431388855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31559231877326965, "epoch": 4.62, "learning_rate": 1.697862576983456e-05, "loss": 0.4768, "step": 5469, "task_loss": 0.1779908388853073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.541858434677124, "epoch": 4.62, "learning_rate": 1.6972587851708734e-05, "loss": 0.4705, "step": 5470, "task_loss": 0.8442181348800659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4459349811077118, "epoch": 4.62, "learning_rate": 1.69665499335829e-05, "loss": 0.4104, "step": 5471, "task_loss": 0.16329926252365112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45675379037857056, "epoch": 4.63, "learning_rate": 1.696051201545707e-05, "loss": 0.4997, "step": 5472, "task_loss": 0.768002450466156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7953561544418335, "epoch": 4.63, "learning_rate": 1.6954474097331242e-05, "loss": 0.564, "step": 5473, "task_loss": 0.651598334312439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5013059973716736, "epoch": 4.63, "learning_rate": 1.694843617920541e-05, "loss": 0.515, "step": 5474, "task_loss": 0.2683398127555847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41923826932907104, "epoch": 4.63, "learning_rate": 1.694239826107958e-05, "loss": 0.4483, "step": 5475, "task_loss": 0.33310467004776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3472307324409485, "epoch": 4.63, "learning_rate": 1.693636034295375e-05, "loss": 0.6053, "step": 5476, "task_loss": 1.075961709022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40587639808654785, "epoch": 4.63, "learning_rate": 1.693032242482792e-05, "loss": 0.5657, "step": 5477, "task_loss": 1.0487520694732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4962114095687866, "epoch": 4.63, "learning_rate": 1.692428450670209e-05, "loss": 0.5145, "step": 5478, "task_loss": 0.8609305620193481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2868644893169403, "epoch": 4.63, "learning_rate": 1.691824658857626e-05, "loss": 0.3927, "step": 5479, "task_loss": 0.16665948927402496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8361634612083435, "epoch": 4.63, "learning_rate": 1.691220867045043e-05, "loss": 0.6755, "step": 5480, "task_loss": 0.9524366855621338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4137452244758606, "epoch": 4.63, "learning_rate": 1.69061707523246e-05, "loss": 0.5854, "step": 5481, "task_loss": 0.4507189393043518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5147236585617065, "epoch": 4.63, "learning_rate": 1.690013283419877e-05, "loss": 0.4341, "step": 5482, "task_loss": 0.4382483661174774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4952126145362854, "epoch": 4.63, "learning_rate": 1.6894094916072938e-05, "loss": 0.3705, "step": 5483, "task_loss": 1.4983782768249512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24983911216259003, "epoch": 4.64, "learning_rate": 1.6888056997947108e-05, "loss": 0.3442, "step": 5484, "task_loss": 0.6257839798927307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6691804528236389, "epoch": 4.64, "learning_rate": 1.688201907982128e-05, "loss": 0.6041, "step": 5485, "task_loss": 0.7354838252067566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46053797006607056, "epoch": 4.64, "learning_rate": 1.687598116169545e-05, "loss": 0.4503, "step": 5486, "task_loss": 0.4965379238128662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6130886077880859, "epoch": 4.64, "learning_rate": 1.686994324356962e-05, "loss": 0.5423, "step": 5487, "task_loss": 0.485504150390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4289071261882782, "epoch": 4.64, "learning_rate": 1.6863905325443787e-05, "loss": 0.5484, "step": 5488, "task_loss": 0.6775035262107849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4593380093574524, "epoch": 4.64, "learning_rate": 1.6857867407317958e-05, "loss": 0.4377, "step": 5489, "task_loss": 0.4332638084888458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8059954643249512, "epoch": 4.64, "learning_rate": 1.6851829489192128e-05, "loss": 0.4635, "step": 5490, "task_loss": 1.5076130628585815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4752492308616638, "epoch": 4.64, "learning_rate": 1.6845791571066295e-05, "loss": 0.5055, "step": 5491, "task_loss": 0.7368783354759216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4119701385498047, "epoch": 4.64, "learning_rate": 1.683975365294047e-05, "loss": 0.5459, "step": 5492, "task_loss": 0.3374151289463043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6387439370155334, "epoch": 4.64, "learning_rate": 1.6833715734814636e-05, "loss": 0.4198, "step": 5493, "task_loss": 0.5488913059234619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5181170701980591, "epoch": 4.64, "learning_rate": 1.6827677816688807e-05, "loss": 0.5759, "step": 5494, "task_loss": 0.5951091647148132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4909436106681824, "epoch": 4.64, "learning_rate": 1.6821639898562978e-05, "loss": 0.51, "step": 5495, "task_loss": 0.5649423599243164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4099913239479065, "epoch": 4.65, "learning_rate": 1.6815601980437145e-05, "loss": 0.4278, "step": 5496, "task_loss": 1.1192233562469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3774700164794922, "epoch": 4.65, "learning_rate": 1.680956406231132e-05, "loss": 0.5683, "step": 5497, "task_loss": 1.0754592418670654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3625316023826599, "epoch": 4.65, "learning_rate": 1.6803526144185486e-05, "loss": 0.3739, "step": 5498, "task_loss": 1.0995348691940308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36881405115127563, "epoch": 4.65, "learning_rate": 1.6797488226059653e-05, "loss": 0.3073, "step": 5499, "task_loss": 0.2381659597158432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3133361041545868, "epoch": 4.65, "learning_rate": 1.6791450307933827e-05, "loss": 0.444, "step": 5500, "task_loss": 0.5702794790267944 }, { "epoch": 4.65, "eval_accuracy": 0.9043564356435644, "eval_loss": 0.32675039768218994, "eval_runtime": 229.206, "eval_samples_per_second": 110.163, "eval_steps_per_second": 0.864, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4147684574127197, "epoch": 4.65, "learning_rate": 1.6785412389807994e-05, "loss": 0.4852, "step": 5501, "task_loss": 0.3123387098312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41534972190856934, "epoch": 4.65, "learning_rate": 1.6779374471682165e-05, "loss": 0.4718, "step": 5502, "task_loss": 0.49083971977233887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33565112948417664, "epoch": 4.65, "learning_rate": 1.6773336553556335e-05, "loss": 0.4887, "step": 5503, "task_loss": 1.2359427213668823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32279524207115173, "epoch": 4.65, "learning_rate": 1.6767298635430503e-05, "loss": 0.4697, "step": 5504, "task_loss": 0.3989332318305969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6157023310661316, "epoch": 4.65, "learning_rate": 1.6761260717304677e-05, "loss": 0.6247, "step": 5505, "task_loss": 0.37597203254699707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5993354320526123, "epoch": 4.65, "learning_rate": 1.6755222799178844e-05, "loss": 0.4321, "step": 5506, "task_loss": 0.4510357975959778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37996605038642883, "epoch": 4.65, "learning_rate": 1.6749184881053014e-05, "loss": 0.4441, "step": 5507, "task_loss": 0.8775479793548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3377774655818939, "epoch": 4.66, "learning_rate": 1.6743146962927185e-05, "loss": 0.4732, "step": 5508, "task_loss": 1.3134193420410156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49325382709503174, "epoch": 4.66, "learning_rate": 1.6737109044801352e-05, "loss": 0.524, "step": 5509, "task_loss": 0.6207937598228455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3179996609687805, "epoch": 4.66, "learning_rate": 1.6731071126675523e-05, "loss": 0.4563, "step": 5510, "task_loss": 0.34824472665786743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33780375123023987, "epoch": 4.66, "learning_rate": 1.6725033208549693e-05, "loss": 0.5856, "step": 5511, "task_loss": 0.4170486330986023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.289669394493103, "epoch": 4.66, "learning_rate": 1.6718995290423864e-05, "loss": 0.5084, "step": 5512, "task_loss": 0.02646147459745407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7914003729820251, "epoch": 4.66, "learning_rate": 1.671295737229803e-05, "loss": 0.6016, "step": 5513, "task_loss": 0.6610104441642761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2872586250305176, "epoch": 4.66, "learning_rate": 1.67069194541722e-05, "loss": 0.4729, "step": 5514, "task_loss": 0.3066456913948059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2636492848396301, "epoch": 4.66, "learning_rate": 1.6700881536046372e-05, "loss": 0.5339, "step": 5515, "task_loss": 0.3916909694671631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23536480963230133, "epoch": 4.66, "learning_rate": 1.6694843617920543e-05, "loss": 0.3685, "step": 5516, "task_loss": 0.55550217628479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41108274459838867, "epoch": 4.66, "learning_rate": 1.6688805699794713e-05, "loss": 0.5015, "step": 5517, "task_loss": 0.629697322845459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3850550651550293, "epoch": 4.66, "learning_rate": 1.668276778166888e-05, "loss": 0.3613, "step": 5518, "task_loss": 0.8391644954681396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.523095965385437, "epoch": 4.66, "learning_rate": 1.667672986354305e-05, "loss": 0.6693, "step": 5519, "task_loss": 0.818298876285553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2860914468765259, "epoch": 4.67, "learning_rate": 1.667069194541722e-05, "loss": 0.4147, "step": 5520, "task_loss": 0.8251075744628906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22926054894924164, "epoch": 4.67, "learning_rate": 1.666465402729139e-05, "loss": 0.4231, "step": 5521, "task_loss": 0.0574369803071022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6622275114059448, "epoch": 4.67, "learning_rate": 1.6658616109165563e-05, "loss": 0.6638, "step": 5522, "task_loss": 0.7030304074287415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5450395941734314, "epoch": 4.67, "learning_rate": 1.665257819103973e-05, "loss": 0.5008, "step": 5523, "task_loss": 0.48319536447525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9016443490982056, "epoch": 4.67, "learning_rate": 1.66465402729139e-05, "loss": 0.7457, "step": 5524, "task_loss": 0.3283531665802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35418522357940674, "epoch": 4.67, "learning_rate": 1.664050235478807e-05, "loss": 0.5549, "step": 5525, "task_loss": 0.12774589657783508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45832329988479614, "epoch": 4.67, "learning_rate": 1.6634464436662238e-05, "loss": 0.4973, "step": 5526, "task_loss": 0.2509722113609314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6581006050109863, "epoch": 4.67, "learning_rate": 1.6628426518536412e-05, "loss": 0.4521, "step": 5527, "task_loss": 1.7563774585723877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36212971806526184, "epoch": 4.67, "learning_rate": 1.662238860041058e-05, "loss": 0.4199, "step": 5528, "task_loss": 0.7862239480018616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.436812162399292, "epoch": 4.67, "learning_rate": 1.6616350682284747e-05, "loss": 0.6177, "step": 5529, "task_loss": 0.20202237367630005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4297260046005249, "epoch": 4.67, "learning_rate": 1.661031276415892e-05, "loss": 0.5178, "step": 5530, "task_loss": 0.6849742531776428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.719603955745697, "epoch": 4.67, "learning_rate": 1.6604274846033088e-05, "loss": 0.5958, "step": 5531, "task_loss": 1.1233752965927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5176264047622681, "epoch": 4.68, "learning_rate": 1.6598236927907258e-05, "loss": 0.4948, "step": 5532, "task_loss": 0.6687624454498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6180204153060913, "epoch": 4.68, "learning_rate": 1.659219900978143e-05, "loss": 0.4425, "step": 5533, "task_loss": 0.12002148479223251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4559169113636017, "epoch": 4.68, "learning_rate": 1.6586161091655596e-05, "loss": 0.4809, "step": 5534, "task_loss": 0.3982764482498169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17186826467514038, "epoch": 4.68, "learning_rate": 1.658012317352977e-05, "loss": 0.5009, "step": 5535, "task_loss": 0.5122766494750977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40602174401283264, "epoch": 4.68, "learning_rate": 1.6574085255403937e-05, "loss": 0.5229, "step": 5536, "task_loss": 1.0928808450698853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7302366495132446, "epoch": 4.68, "learning_rate": 1.6568047337278108e-05, "loss": 0.4838, "step": 5537, "task_loss": 1.037413239479065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3035411238670349, "epoch": 4.68, "learning_rate": 1.6562009419152278e-05, "loss": 0.4952, "step": 5538, "task_loss": 0.5348610877990723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4587266445159912, "epoch": 4.68, "learning_rate": 1.6555971501026445e-05, "loss": 0.4972, "step": 5539, "task_loss": 1.2847027778625488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4258194863796234, "epoch": 4.68, "learning_rate": 1.6549933582900616e-05, "loss": 0.623, "step": 5540, "task_loss": 0.5269262194633484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4110688865184784, "epoch": 4.68, "learning_rate": 1.6543895664774787e-05, "loss": 0.3561, "step": 5541, "task_loss": 0.4787455201148987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23311346769332886, "epoch": 4.68, "learning_rate": 1.6537857746648957e-05, "loss": 0.4193, "step": 5542, "task_loss": 1.0165972709655762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2708989381790161, "epoch": 4.69, "learning_rate": 1.6531819828523128e-05, "loss": 0.4763, "step": 5543, "task_loss": 0.7854048013687134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3676983118057251, "epoch": 4.69, "learning_rate": 1.6525781910397295e-05, "loss": 0.4102, "step": 5544, "task_loss": 0.4296169877052307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3464120924472809, "epoch": 4.69, "learning_rate": 1.6519743992271465e-05, "loss": 0.4025, "step": 5545, "task_loss": 0.6425986886024475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5495314598083496, "epoch": 4.69, "learning_rate": 1.6513706074145636e-05, "loss": 0.4297, "step": 5546, "task_loss": 0.7150076031684875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36066633462905884, "epoch": 4.69, "learning_rate": 1.6507668156019807e-05, "loss": 0.3932, "step": 5547, "task_loss": 0.5827043652534485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3435899615287781, "epoch": 4.69, "learning_rate": 1.6501630237893974e-05, "loss": 0.5748, "step": 5548, "task_loss": 0.7936814427375793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.317018061876297, "epoch": 4.69, "learning_rate": 1.6495592319768144e-05, "loss": 0.3345, "step": 5549, "task_loss": 0.7015313506126404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8925701379776001, "epoch": 4.69, "learning_rate": 1.6489554401642315e-05, "loss": 0.5696, "step": 5550, "task_loss": 0.6457761526107788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34813034534454346, "epoch": 4.69, "learning_rate": 1.6483516483516486e-05, "loss": 0.4001, "step": 5551, "task_loss": 0.7111232280731201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47973620891571045, "epoch": 4.69, "learning_rate": 1.6477478565390653e-05, "loss": 0.5676, "step": 5552, "task_loss": 0.7966150641441345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5473794937133789, "epoch": 4.69, "learning_rate": 1.6471440647264823e-05, "loss": 0.5306, "step": 5553, "task_loss": 0.954845130443573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39358165860176086, "epoch": 4.69, "learning_rate": 1.6465402729138994e-05, "loss": 0.4211, "step": 5554, "task_loss": 0.22330710291862488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37325403094291687, "epoch": 4.7, "learning_rate": 1.6459364811013164e-05, "loss": 0.4607, "step": 5555, "task_loss": 0.5965156555175781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5200329422950745, "epoch": 4.7, "learning_rate": 1.645332689288733e-05, "loss": 0.5767, "step": 5556, "task_loss": 0.55033940076828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5287368297576904, "epoch": 4.7, "learning_rate": 1.6447288974761502e-05, "loss": 0.5875, "step": 5557, "task_loss": 0.3366183638572693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5582667589187622, "epoch": 4.7, "learning_rate": 1.6441251056635673e-05, "loss": 0.5322, "step": 5558, "task_loss": 0.8144546747207642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5114145278930664, "epoch": 4.7, "learning_rate": 1.6435213138509843e-05, "loss": 0.4241, "step": 5559, "task_loss": 0.6799103617668152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48670458793640137, "epoch": 4.7, "learning_rate": 1.6429175220384014e-05, "loss": 0.4541, "step": 5560, "task_loss": 0.7209588885307312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3843807876110077, "epoch": 4.7, "learning_rate": 1.642313730225818e-05, "loss": 0.4801, "step": 5561, "task_loss": 0.5228511095046997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7493880987167358, "epoch": 4.7, "learning_rate": 1.641709938413235e-05, "loss": 0.6059, "step": 5562, "task_loss": 0.8271488547325134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43549206852912903, "epoch": 4.7, "learning_rate": 1.6411061466006522e-05, "loss": 0.4738, "step": 5563, "task_loss": 0.49297448992729187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5315837264060974, "epoch": 4.7, "learning_rate": 1.640502354788069e-05, "loss": 0.635, "step": 5564, "task_loss": 0.13145673274993896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32037752866744995, "epoch": 4.7, "learning_rate": 1.6398985629754863e-05, "loss": 0.5194, "step": 5565, "task_loss": 0.5963384509086609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39566391706466675, "epoch": 4.7, "learning_rate": 1.639294771162903e-05, "loss": 0.5598, "step": 5566, "task_loss": 0.6603891253471375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46277955174446106, "epoch": 4.71, "learning_rate": 1.63869097935032e-05, "loss": 0.4827, "step": 5567, "task_loss": 0.6127215027809143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6226282119750977, "epoch": 4.71, "learning_rate": 1.638087187537737e-05, "loss": 0.6866, "step": 5568, "task_loss": 0.1706911027431488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4275314509868622, "epoch": 4.71, "learning_rate": 1.637483395725154e-05, "loss": 0.5025, "step": 5569, "task_loss": 0.3638128638267517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38007891178131104, "epoch": 4.71, "learning_rate": 1.636879603912571e-05, "loss": 0.3946, "step": 5570, "task_loss": 0.5417359471321106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8141539096832275, "epoch": 4.71, "learning_rate": 1.636275812099988e-05, "loss": 0.6436, "step": 5571, "task_loss": 0.6862582564353943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1129305362701416, "epoch": 4.71, "learning_rate": 1.6356720202874047e-05, "loss": 0.6323, "step": 5572, "task_loss": 0.851077139377594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5597401261329651, "epoch": 4.71, "learning_rate": 1.635068228474822e-05, "loss": 0.4722, "step": 5573, "task_loss": 0.8088442087173462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4983350336551666, "epoch": 4.71, "learning_rate": 1.634464436662239e-05, "loss": 0.4478, "step": 5574, "task_loss": 0.627778947353363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40715292096138, "epoch": 4.71, "learning_rate": 1.633860644849656e-05, "loss": 0.5729, "step": 5575, "task_loss": 0.929154634475708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5212172865867615, "epoch": 4.71, "learning_rate": 1.633256853037073e-05, "loss": 0.4585, "step": 5576, "task_loss": 0.14736983180046082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3323495388031006, "epoch": 4.71, "learning_rate": 1.6326530612244897e-05, "loss": 0.4718, "step": 5577, "task_loss": 0.6321436166763306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5940913558006287, "epoch": 4.71, "learning_rate": 1.6320492694119067e-05, "loss": 0.4797, "step": 5578, "task_loss": 0.5767788887023926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.536165177822113, "epoch": 4.72, "learning_rate": 1.6314454775993238e-05, "loss": 0.564, "step": 5579, "task_loss": 1.1479891538619995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5259643197059631, "epoch": 4.72, "learning_rate": 1.630841685786741e-05, "loss": 0.5189, "step": 5580, "task_loss": 0.16334594786167145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31547582149505615, "epoch": 4.72, "learning_rate": 1.630237893974158e-05, "loss": 0.5224, "step": 5581, "task_loss": 0.10409452766180038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32807981967926025, "epoch": 4.72, "learning_rate": 1.6296341021615746e-05, "loss": 0.4363, "step": 5582, "task_loss": 0.48191022872924805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6458097696304321, "epoch": 4.72, "learning_rate": 1.6290303103489917e-05, "loss": 0.715, "step": 5583, "task_loss": 0.6684753894805908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5509357452392578, "epoch": 4.72, "learning_rate": 1.6284265185364087e-05, "loss": 0.5419, "step": 5584, "task_loss": 1.3463982343673706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6108837127685547, "epoch": 4.72, "learning_rate": 1.6278227267238258e-05, "loss": 0.4388, "step": 5585, "task_loss": 1.5121443271636963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42857879400253296, "epoch": 4.72, "learning_rate": 1.6272189349112425e-05, "loss": 0.4971, "step": 5586, "task_loss": 0.5821583867073059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19345209002494812, "epoch": 4.72, "learning_rate": 1.6266151430986596e-05, "loss": 0.4361, "step": 5587, "task_loss": 0.31440532207489014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43258535861968994, "epoch": 4.72, "learning_rate": 1.6260113512860766e-05, "loss": 0.4752, "step": 5588, "task_loss": 1.4167523384094238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5530897378921509, "epoch": 4.72, "learning_rate": 1.6254075594734937e-05, "loss": 0.4815, "step": 5589, "task_loss": 0.5023097395896912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4738820791244507, "epoch": 4.72, "learning_rate": 1.6248037676609107e-05, "loss": 0.4031, "step": 5590, "task_loss": 0.3816971182823181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39053091406822205, "epoch": 4.73, "learning_rate": 1.6241999758483274e-05, "loss": 0.5554, "step": 5591, "task_loss": 1.148266077041626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4704574942588806, "epoch": 4.73, "learning_rate": 1.6235961840357445e-05, "loss": 0.465, "step": 5592, "task_loss": 0.7403675317764282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5168968439102173, "epoch": 4.73, "learning_rate": 1.6229923922231616e-05, "loss": 0.4938, "step": 5593, "task_loss": 0.9816514849662781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5262459516525269, "epoch": 4.73, "learning_rate": 1.6223886004105783e-05, "loss": 0.6371, "step": 5594, "task_loss": 0.9309276342391968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5352354049682617, "epoch": 4.73, "learning_rate": 1.6217848085979957e-05, "loss": 0.6131, "step": 5595, "task_loss": 0.4466126263141632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.284623384475708, "epoch": 4.73, "learning_rate": 1.6211810167854124e-05, "loss": 0.4867, "step": 5596, "task_loss": 0.44486290216445923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4374670386314392, "epoch": 4.73, "learning_rate": 1.6205772249728295e-05, "loss": 0.4941, "step": 5597, "task_loss": 1.2740380764007568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46359074115753174, "epoch": 4.73, "learning_rate": 1.6199734331602465e-05, "loss": 0.4302, "step": 5598, "task_loss": 0.6965603828430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4394988417625427, "epoch": 4.73, "learning_rate": 1.6193696413476632e-05, "loss": 0.429, "step": 5599, "task_loss": 0.44126826524734497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6105198264122009, "epoch": 4.73, "learning_rate": 1.6187658495350806e-05, "loss": 0.5792, "step": 5600, "task_loss": 0.6733864545822144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5707597136497498, "epoch": 4.73, "learning_rate": 1.6181620577224973e-05, "loss": 0.4944, "step": 5601, "task_loss": 0.34913721680641174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4054502546787262, "epoch": 4.73, "learning_rate": 1.617558265909914e-05, "loss": 0.5655, "step": 5602, "task_loss": 1.294619083404541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5315719246864319, "epoch": 4.74, "learning_rate": 1.6169544740973315e-05, "loss": 0.437, "step": 5603, "task_loss": 0.06435100734233856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3744083642959595, "epoch": 4.74, "learning_rate": 1.6163506822847482e-05, "loss": 0.4109, "step": 5604, "task_loss": 0.4203505516052246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3453739881515503, "epoch": 4.74, "learning_rate": 1.6157468904721652e-05, "loss": 0.5286, "step": 5605, "task_loss": 1.4459477663040161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3642445504665375, "epoch": 4.74, "learning_rate": 1.6151430986595823e-05, "loss": 0.5754, "step": 5606, "task_loss": 0.5929403305053711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7598825693130493, "epoch": 4.74, "learning_rate": 1.614539306846999e-05, "loss": 0.5458, "step": 5607, "task_loss": 0.3928650915622711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7772847414016724, "epoch": 4.74, "learning_rate": 1.6139355150344164e-05, "loss": 0.5578, "step": 5608, "task_loss": 1.1732648611068726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5160415768623352, "epoch": 4.74, "learning_rate": 1.613331723221833e-05, "loss": 0.5886, "step": 5609, "task_loss": 0.4166787266731262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7778906226158142, "epoch": 4.74, "learning_rate": 1.6127279314092502e-05, "loss": 0.6079, "step": 5610, "task_loss": 1.2076853513717651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.592951774597168, "epoch": 4.74, "learning_rate": 1.6121241395966672e-05, "loss": 0.5109, "step": 5611, "task_loss": 0.8812345862388611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5346471071243286, "epoch": 4.74, "learning_rate": 1.611520347784084e-05, "loss": 0.5163, "step": 5612, "task_loss": 1.240616798400879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7768629789352417, "epoch": 4.74, "learning_rate": 1.610916555971501e-05, "loss": 0.4074, "step": 5613, "task_loss": 0.7720212340354919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4200711250305176, "epoch": 4.75, "learning_rate": 1.610312764158918e-05, "loss": 0.7311, "step": 5614, "task_loss": 0.4565313458442688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5321477055549622, "epoch": 4.75, "learning_rate": 1.609708972346335e-05, "loss": 0.5121, "step": 5615, "task_loss": 1.1981844902038574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5908830165863037, "epoch": 4.75, "learning_rate": 1.6091051805337522e-05, "loss": 0.515, "step": 5616, "task_loss": 1.3312439918518066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42604202032089233, "epoch": 4.75, "learning_rate": 1.608501388721169e-05, "loss": 0.4352, "step": 5617, "task_loss": 1.0120296478271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5041521191596985, "epoch": 4.75, "learning_rate": 1.607897596908586e-05, "loss": 0.4305, "step": 5618, "task_loss": 0.36833247542381287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5687348246574402, "epoch": 4.75, "learning_rate": 1.607293805096003e-05, "loss": 0.5372, "step": 5619, "task_loss": 1.5214262008666992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5037246942520142, "epoch": 4.75, "learning_rate": 1.60669001328342e-05, "loss": 0.4808, "step": 5620, "task_loss": 0.48083245754241943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.466033011674881, "epoch": 4.75, "learning_rate": 1.6060862214708368e-05, "loss": 0.5682, "step": 5621, "task_loss": 0.7249603271484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40441203117370605, "epoch": 4.75, "learning_rate": 1.605482429658254e-05, "loss": 0.596, "step": 5622, "task_loss": 0.49672913551330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5674706697463989, "epoch": 4.75, "learning_rate": 1.604878637845671e-05, "loss": 0.4534, "step": 5623, "task_loss": 0.5650836229324341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38541892170906067, "epoch": 4.75, "learning_rate": 1.604274846033088e-05, "loss": 0.4972, "step": 5624, "task_loss": 0.569334864616394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4885154366493225, "epoch": 4.75, "learning_rate": 1.603671054220505e-05, "loss": 0.6367, "step": 5625, "task_loss": 1.1376960277557373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5204009413719177, "epoch": 4.76, "learning_rate": 1.6030672624079217e-05, "loss": 0.4242, "step": 5626, "task_loss": 0.4900137484073639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3744223713874817, "epoch": 4.76, "learning_rate": 1.6024634705953388e-05, "loss": 0.6107, "step": 5627, "task_loss": 0.6716447472572327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5073534250259399, "epoch": 4.76, "learning_rate": 1.601859678782756e-05, "loss": 0.3355, "step": 5628, "task_loss": 0.5383113622665405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5712324380874634, "epoch": 4.76, "learning_rate": 1.6012558869701726e-05, "loss": 0.5572, "step": 5629, "task_loss": 0.4822181165218353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6921467781066895, "epoch": 4.76, "learning_rate": 1.60065209515759e-05, "loss": 0.5376, "step": 5630, "task_loss": 0.7090402245521545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6182292103767395, "epoch": 4.76, "learning_rate": 1.6000483033450067e-05, "loss": 0.6534, "step": 5631, "task_loss": 0.5375955104827881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5978118181228638, "epoch": 4.76, "learning_rate": 1.5994445115324237e-05, "loss": 0.5421, "step": 5632, "task_loss": 0.7081409096717834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30184221267700195, "epoch": 4.76, "learning_rate": 1.5988407197198408e-05, "loss": 0.3536, "step": 5633, "task_loss": 0.3410731256008148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47292661666870117, "epoch": 4.76, "learning_rate": 1.5982369279072575e-05, "loss": 0.4276, "step": 5634, "task_loss": 1.0052160024642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3137657046318054, "epoch": 4.76, "learning_rate": 1.5976331360946746e-05, "loss": 0.396, "step": 5635, "task_loss": 0.4215075969696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49811023473739624, "epoch": 4.76, "learning_rate": 1.5970293442820916e-05, "loss": 0.4922, "step": 5636, "task_loss": 0.48547983169555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7490163445472717, "epoch": 4.76, "learning_rate": 1.5964255524695084e-05, "loss": 0.5161, "step": 5637, "task_loss": 0.8177903294563293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4051777124404907, "epoch": 4.77, "learning_rate": 1.5958217606569257e-05, "loss": 0.6321, "step": 5638, "task_loss": 0.6814428567886353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45394420623779297, "epoch": 4.77, "learning_rate": 1.5952179688443425e-05, "loss": 0.4648, "step": 5639, "task_loss": 0.8112760186195374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6142861843109131, "epoch": 4.77, "learning_rate": 1.5946141770317595e-05, "loss": 0.5072, "step": 5640, "task_loss": 1.8828442096710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4361482560634613, "epoch": 4.77, "learning_rate": 1.5940103852191766e-05, "loss": 0.4997, "step": 5641, "task_loss": 0.7529924511909485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21560516953468323, "epoch": 4.77, "learning_rate": 1.5934065934065933e-05, "loss": 0.5234, "step": 5642, "task_loss": 0.21767646074295044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9339041113853455, "epoch": 4.77, "learning_rate": 1.5928028015940104e-05, "loss": 0.7083, "step": 5643, "task_loss": 1.0546343326568604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27588972449302673, "epoch": 4.77, "learning_rate": 1.5921990097814274e-05, "loss": 0.4961, "step": 5644, "task_loss": 0.8762207627296448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4512467086315155, "epoch": 4.77, "learning_rate": 1.5915952179688445e-05, "loss": 0.4759, "step": 5645, "task_loss": 0.8578436970710754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19333422183990479, "epoch": 4.77, "learning_rate": 1.5909914261562615e-05, "loss": 0.4834, "step": 5646, "task_loss": 0.2782150208950043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2531431317329407, "epoch": 4.77, "learning_rate": 1.5903876343436782e-05, "loss": 0.6147, "step": 5647, "task_loss": 0.7841020226478577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6769366264343262, "epoch": 4.77, "learning_rate": 1.5897838425310953e-05, "loss": 0.5867, "step": 5648, "task_loss": 0.6421873569488525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4271872341632843, "epoch": 4.77, "learning_rate": 1.5891800507185124e-05, "loss": 0.6318, "step": 5649, "task_loss": 0.5544843673706055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2765531539916992, "epoch": 4.78, "learning_rate": 1.5885762589059294e-05, "loss": 0.3965, "step": 5650, "task_loss": 0.1339876651763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24641792476177216, "epoch": 4.78, "learning_rate": 1.587972467093346e-05, "loss": 0.4109, "step": 5651, "task_loss": 0.1329040825366974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23534739017486572, "epoch": 4.78, "learning_rate": 1.5873686752807632e-05, "loss": 0.4167, "step": 5652, "task_loss": 1.1230051517486572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3777313828468323, "epoch": 4.78, "learning_rate": 1.5867648834681802e-05, "loss": 0.4568, "step": 5653, "task_loss": 0.6795060038566589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3452783524990082, "epoch": 4.78, "learning_rate": 1.5861610916555973e-05, "loss": 0.3956, "step": 5654, "task_loss": 0.22551333904266357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6292358040809631, "epoch": 4.78, "learning_rate": 1.5855572998430144e-05, "loss": 0.6387, "step": 5655, "task_loss": 0.9434925317764282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31812524795532227, "epoch": 4.78, "learning_rate": 1.584953508030431e-05, "loss": 0.5439, "step": 5656, "task_loss": 1.0697795152664185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5132414102554321, "epoch": 4.78, "learning_rate": 1.584349716217848e-05, "loss": 0.4892, "step": 5657, "task_loss": 0.2622401714324951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5250824093818665, "epoch": 4.78, "learning_rate": 1.5837459244052652e-05, "loss": 0.5517, "step": 5658, "task_loss": 1.0126469135284424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.054580569267273, "epoch": 4.78, "learning_rate": 1.583142132592682e-05, "loss": 0.5761, "step": 5659, "task_loss": 0.16966791450977325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6412950754165649, "epoch": 4.78, "learning_rate": 1.5825383407800993e-05, "loss": 0.5976, "step": 5660, "task_loss": 0.4650421142578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20212990045547485, "epoch": 4.78, "learning_rate": 1.581934548967516e-05, "loss": 0.4795, "step": 5661, "task_loss": 0.4432717561721802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45405086874961853, "epoch": 4.79, "learning_rate": 1.581330757154933e-05, "loss": 0.4861, "step": 5662, "task_loss": 0.09262710809707642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.641856849193573, "epoch": 4.79, "learning_rate": 1.58072696534235e-05, "loss": 0.4098, "step": 5663, "task_loss": 0.5284962058067322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5719764828681946, "epoch": 4.79, "learning_rate": 1.580123173529767e-05, "loss": 0.4695, "step": 5664, "task_loss": 0.8492804169654846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25218066573143005, "epoch": 4.79, "learning_rate": 1.5795193817171843e-05, "loss": 0.3622, "step": 5665, "task_loss": 0.7229119539260864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5369490385055542, "epoch": 4.79, "learning_rate": 1.578915589904601e-05, "loss": 0.6206, "step": 5666, "task_loss": 0.2394445538520813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27831780910491943, "epoch": 4.79, "learning_rate": 1.5783117980920177e-05, "loss": 0.4182, "step": 5667, "task_loss": 0.36029720306396484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6309812664985657, "epoch": 4.79, "learning_rate": 1.577708006279435e-05, "loss": 0.5437, "step": 5668, "task_loss": 0.5753858685493469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5308962464332581, "epoch": 4.79, "learning_rate": 1.5771042144668518e-05, "loss": 0.4824, "step": 5669, "task_loss": 0.33407866954803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3617568612098694, "epoch": 4.79, "learning_rate": 1.576500422654269e-05, "loss": 0.4649, "step": 5670, "task_loss": 0.6683066487312317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.122196078300476, "epoch": 4.79, "learning_rate": 1.575896630841686e-05, "loss": 0.8524, "step": 5671, "task_loss": 1.0444464683532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3187519609928131, "epoch": 4.79, "learning_rate": 1.5752928390291026e-05, "loss": 0.4193, "step": 5672, "task_loss": 0.5669804215431213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2780523896217346, "epoch": 4.79, "learning_rate": 1.57468904721652e-05, "loss": 0.4899, "step": 5673, "task_loss": 0.7641419172286987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26874101161956787, "epoch": 4.8, "learning_rate": 1.5740852554039368e-05, "loss": 0.6131, "step": 5674, "task_loss": 0.6044903993606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29261094331741333, "epoch": 4.8, "learning_rate": 1.5734814635913538e-05, "loss": 0.5649, "step": 5675, "task_loss": 0.6579650044441223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3985699415206909, "epoch": 4.8, "learning_rate": 1.572877671778771e-05, "loss": 0.4611, "step": 5676, "task_loss": 0.3705599009990692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5954338908195496, "epoch": 4.8, "learning_rate": 1.5722738799661876e-05, "loss": 0.518, "step": 5677, "task_loss": 0.6047748923301697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6246927976608276, "epoch": 4.8, "learning_rate": 1.5716700881536046e-05, "loss": 0.582, "step": 5678, "task_loss": 0.9215129017829895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3362606465816498, "epoch": 4.8, "learning_rate": 1.5710662963410217e-05, "loss": 0.4873, "step": 5679, "task_loss": 1.6423377990722656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3829401433467865, "epoch": 4.8, "learning_rate": 1.5704625045284388e-05, "loss": 0.4536, "step": 5680, "task_loss": 1.2405848503112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37810492515563965, "epoch": 4.8, "learning_rate": 1.5698587127158558e-05, "loss": 0.6177, "step": 5681, "task_loss": 0.7280991077423096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.719547688961029, "epoch": 4.8, "learning_rate": 1.5692549209032725e-05, "loss": 0.663, "step": 5682, "task_loss": 0.48877912759780884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2729325592517853, "epoch": 4.8, "learning_rate": 1.5686511290906896e-05, "loss": 0.4549, "step": 5683, "task_loss": 0.061100929975509644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4527157247066498, "epoch": 4.8, "learning_rate": 1.5680473372781066e-05, "loss": 0.4292, "step": 5684, "task_loss": 0.6397699117660522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3957953155040741, "epoch": 4.81, "learning_rate": 1.5674435454655237e-05, "loss": 0.4408, "step": 5685, "task_loss": 0.9481605887413025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3514915704727173, "epoch": 4.81, "learning_rate": 1.5668397536529404e-05, "loss": 0.4833, "step": 5686, "task_loss": 0.7950453758239746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2876395285129547, "epoch": 4.81, "learning_rate": 1.5662359618403575e-05, "loss": 0.3692, "step": 5687, "task_loss": 0.2596665918827057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5985586643218994, "epoch": 4.81, "learning_rate": 1.5656321700277745e-05, "loss": 0.5823, "step": 5688, "task_loss": 0.5179651379585266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7245669960975647, "epoch": 4.81, "learning_rate": 1.5650283782151916e-05, "loss": 0.471, "step": 5689, "task_loss": 1.018504023551941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4200316369533539, "epoch": 4.81, "learning_rate": 1.5644245864026087e-05, "loss": 0.4043, "step": 5690, "task_loss": 0.4716778099536896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5841517448425293, "epoch": 4.81, "learning_rate": 1.5638207945900254e-05, "loss": 0.5514, "step": 5691, "task_loss": 1.409725308418274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42265236377716064, "epoch": 4.81, "learning_rate": 1.5632170027774424e-05, "loss": 0.6344, "step": 5692, "task_loss": 1.5041499137878418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0094242095947266, "epoch": 4.81, "learning_rate": 1.5626132109648595e-05, "loss": 0.6298, "step": 5693, "task_loss": 1.2930750846862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6513161659240723, "epoch": 4.81, "learning_rate": 1.5620094191522762e-05, "loss": 0.5954, "step": 5694, "task_loss": 0.5762127637863159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5854684114456177, "epoch": 4.81, "learning_rate": 1.5614056273396936e-05, "loss": 0.5761, "step": 5695, "task_loss": 1.0394959449768066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4219217002391815, "epoch": 4.81, "learning_rate": 1.5608018355271103e-05, "loss": 0.4843, "step": 5696, "task_loss": 0.49058088660240173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5715158581733704, "epoch": 4.82, "learning_rate": 1.5601980437145274e-05, "loss": 0.5053, "step": 5697, "task_loss": 0.7075931429862976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2913628816604614, "epoch": 4.82, "learning_rate": 1.5595942519019444e-05, "loss": 0.418, "step": 5698, "task_loss": 1.1778804063796997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5293245315551758, "epoch": 4.82, "learning_rate": 1.558990460089361e-05, "loss": 0.6095, "step": 5699, "task_loss": 1.0530405044555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.467443585395813, "epoch": 4.82, "learning_rate": 1.5583866682767782e-05, "loss": 0.4423, "step": 5700, "task_loss": 0.5259082317352295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5770244002342224, "epoch": 4.82, "learning_rate": 1.5577828764641953e-05, "loss": 0.4712, "step": 5701, "task_loss": 0.44420406222343445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3809109032154083, "epoch": 4.82, "learning_rate": 1.557179084651612e-05, "loss": 0.4773, "step": 5702, "task_loss": 0.27997690439224243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4990959167480469, "epoch": 4.82, "learning_rate": 1.5565752928390294e-05, "loss": 0.4796, "step": 5703, "task_loss": 0.8982170224189758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9252793788909912, "epoch": 4.82, "learning_rate": 1.555971501026446e-05, "loss": 0.492, "step": 5704, "task_loss": 0.9382551312446594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6022292375564575, "epoch": 4.82, "learning_rate": 1.555367709213863e-05, "loss": 0.4542, "step": 5705, "task_loss": 0.13141119480133057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5711396932601929, "epoch": 4.82, "learning_rate": 1.5547639174012802e-05, "loss": 0.5575, "step": 5706, "task_loss": 0.6998443603515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39606767892837524, "epoch": 4.82, "learning_rate": 1.554160125588697e-05, "loss": 0.6169, "step": 5707, "task_loss": 0.775436282157898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.716209888458252, "epoch": 4.82, "learning_rate": 1.553556333776114e-05, "loss": 0.6187, "step": 5708, "task_loss": 0.8288116455078125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6977271437644958, "epoch": 4.83, "learning_rate": 1.552952541963531e-05, "loss": 0.544, "step": 5709, "task_loss": 0.32154110074043274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5062721967697144, "epoch": 4.83, "learning_rate": 1.552348750150948e-05, "loss": 0.6182, "step": 5710, "task_loss": 0.8445614576339722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.415189266204834, "epoch": 4.83, "learning_rate": 1.551744958338365e-05, "loss": 0.504, "step": 5711, "task_loss": 0.7094087600708008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.603539764881134, "epoch": 4.83, "learning_rate": 1.551141166525782e-05, "loss": 0.5029, "step": 5712, "task_loss": 0.07858951389789581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47042083740234375, "epoch": 4.83, "learning_rate": 1.550537374713199e-05, "loss": 0.6009, "step": 5713, "task_loss": 0.8537420630455017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.437353253364563, "epoch": 4.83, "learning_rate": 1.549933582900616e-05, "loss": 0.514, "step": 5714, "task_loss": 0.22918996214866638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0176355838775635, "epoch": 4.83, "learning_rate": 1.549329791088033e-05, "loss": 0.5532, "step": 5715, "task_loss": 0.4721285402774811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36173272132873535, "epoch": 4.83, "learning_rate": 1.5487259992754498e-05, "loss": 0.581, "step": 5716, "task_loss": 0.39891916513442993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4689919948577881, "epoch": 4.83, "learning_rate": 1.5481222074628668e-05, "loss": 0.7947, "step": 5717, "task_loss": 0.5248963236808777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29008325934410095, "epoch": 4.83, "learning_rate": 1.547518415650284e-05, "loss": 0.4691, "step": 5718, "task_loss": 0.43997618556022644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5842678546905518, "epoch": 4.83, "learning_rate": 1.546914623837701e-05, "loss": 0.554, "step": 5719, "task_loss": 0.33819183707237244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.609019935131073, "epoch": 4.83, "learning_rate": 1.546310832025118e-05, "loss": 0.6015, "step": 5720, "task_loss": 0.6360526084899902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5081681609153748, "epoch": 4.84, "learning_rate": 1.5457070402125347e-05, "loss": 0.4845, "step": 5721, "task_loss": 1.3137153387069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.287191778421402, "epoch": 4.84, "learning_rate": 1.5451032483999518e-05, "loss": 0.4253, "step": 5722, "task_loss": 0.13402938842773438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38607972860336304, "epoch": 4.84, "learning_rate": 1.5444994565873688e-05, "loss": 0.4028, "step": 5723, "task_loss": 0.103619284927845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5350126624107361, "epoch": 4.84, "learning_rate": 1.5438956647747855e-05, "loss": 0.5525, "step": 5724, "task_loss": 1.3010185956954956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5932537317276001, "epoch": 4.84, "learning_rate": 1.543291872962203e-05, "loss": 0.4499, "step": 5725, "task_loss": 0.9055598974227905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5364070534706116, "epoch": 4.84, "learning_rate": 1.5426880811496197e-05, "loss": 0.5939, "step": 5726, "task_loss": 0.5620115995407104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5883762836456299, "epoch": 4.84, "learning_rate": 1.5420842893370367e-05, "loss": 0.5198, "step": 5727, "task_loss": 1.2068294286727905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45011019706726074, "epoch": 4.84, "learning_rate": 1.5414804975244538e-05, "loss": 0.6943, "step": 5728, "task_loss": 0.19900186359882355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4132983386516571, "epoch": 4.84, "learning_rate": 1.5408767057118705e-05, "loss": 0.3591, "step": 5729, "task_loss": 0.4765997529029846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5018225312232971, "epoch": 4.84, "learning_rate": 1.5402729138992875e-05, "loss": 0.5777, "step": 5730, "task_loss": 0.49483081698417664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30368196964263916, "epoch": 4.84, "learning_rate": 1.5396691220867046e-05, "loss": 0.5301, "step": 5731, "task_loss": 0.8032054901123047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7635002136230469, "epoch": 4.84, "learning_rate": 1.5390653302741213e-05, "loss": 0.68, "step": 5732, "task_loss": 1.0857393741607666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47251012921333313, "epoch": 4.85, "learning_rate": 1.5384615384615387e-05, "loss": 0.6151, "step": 5733, "task_loss": 1.1110893487930298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36363285779953003, "epoch": 4.85, "learning_rate": 1.5378577466489554e-05, "loss": 0.3098, "step": 5734, "task_loss": 0.8031936883926392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5794199705123901, "epoch": 4.85, "learning_rate": 1.5372539548363725e-05, "loss": 0.516, "step": 5735, "task_loss": 1.2402923107147217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1697910875082016, "epoch": 4.85, "learning_rate": 1.5366501630237896e-05, "loss": 0.4513, "step": 5736, "task_loss": 0.2355843037366867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28572651743888855, "epoch": 4.85, "learning_rate": 1.5360463712112063e-05, "loss": 0.4089, "step": 5737, "task_loss": 0.22135579586029053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6795411109924316, "epoch": 4.85, "learning_rate": 1.5354425793986237e-05, "loss": 0.6523, "step": 5738, "task_loss": 0.8354735970497131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6474465131759644, "epoch": 4.85, "learning_rate": 1.5348387875860404e-05, "loss": 0.5865, "step": 5739, "task_loss": 0.654369056224823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5395057201385498, "epoch": 4.85, "learning_rate": 1.534234995773457e-05, "loss": 0.5646, "step": 5740, "task_loss": 0.4729509651660919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3919467329978943, "epoch": 4.85, "learning_rate": 1.5336312039608745e-05, "loss": 0.427, "step": 5741, "task_loss": 0.8063421845436096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.440171480178833, "epoch": 4.85, "learning_rate": 1.5330274121482912e-05, "loss": 0.4307, "step": 5742, "task_loss": 0.759330153465271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6499356627464294, "epoch": 4.85, "learning_rate": 1.5324236203357083e-05, "loss": 0.5415, "step": 5743, "task_loss": 0.21940068900585175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9460481405258179, "epoch": 4.85, "learning_rate": 1.5318198285231253e-05, "loss": 0.5853, "step": 5744, "task_loss": 1.0863806009292603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3309619426727295, "epoch": 4.86, "learning_rate": 1.531216036710542e-05, "loss": 0.5056, "step": 5745, "task_loss": 0.7600712776184082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4350525140762329, "epoch": 4.86, "learning_rate": 1.5306122448979594e-05, "loss": 0.4527, "step": 5746, "task_loss": 1.0556528568267822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34537726640701294, "epoch": 4.86, "learning_rate": 1.530008453085376e-05, "loss": 0.5393, "step": 5747, "task_loss": 0.7721340656280518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5988664031028748, "epoch": 4.86, "learning_rate": 1.5294046612727932e-05, "loss": 0.5007, "step": 5748, "task_loss": 0.9986903667449951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7229220867156982, "epoch": 4.86, "learning_rate": 1.5288008694602103e-05, "loss": 0.6758, "step": 5749, "task_loss": 1.260180950164795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4934806227684021, "epoch": 4.86, "learning_rate": 1.528197077647627e-05, "loss": 0.5332, "step": 5750, "task_loss": 0.5637091398239136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6755383014678955, "epoch": 4.86, "learning_rate": 1.527593285835044e-05, "loss": 0.6149, "step": 5751, "task_loss": 0.5080583095550537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4170818328857422, "epoch": 4.86, "learning_rate": 1.526989494022461e-05, "loss": 0.6308, "step": 5752, "task_loss": 1.5215613842010498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35623130202293396, "epoch": 4.86, "learning_rate": 1.526385702209878e-05, "loss": 0.4771, "step": 5753, "task_loss": 0.24386626482009888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4084607660770416, "epoch": 4.86, "learning_rate": 1.525781910397295e-05, "loss": 0.4323, "step": 5754, "task_loss": 0.7456134557723999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4132385551929474, "epoch": 4.86, "learning_rate": 1.525178118584712e-05, "loss": 0.4349, "step": 5755, "task_loss": 1.4611207246780396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46655547618865967, "epoch": 4.87, "learning_rate": 1.524574326772129e-05, "loss": 0.4404, "step": 5756, "task_loss": 0.8669162392616272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5696276426315308, "epoch": 4.87, "learning_rate": 1.5239705349595459e-05, "loss": 0.369, "step": 5757, "task_loss": 0.17867420613765717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40233302116394043, "epoch": 4.87, "learning_rate": 1.5233667431469631e-05, "loss": 0.4699, "step": 5758, "task_loss": 0.7311951518058777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7507137060165405, "epoch": 4.87, "learning_rate": 1.52276295133438e-05, "loss": 0.5608, "step": 5759, "task_loss": 1.178305745124817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3480607271194458, "epoch": 4.87, "learning_rate": 1.5221591595217969e-05, "loss": 0.5158, "step": 5760, "task_loss": 0.13337811827659607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5539226531982422, "epoch": 4.87, "learning_rate": 1.521555367709214e-05, "loss": 0.6271, "step": 5761, "task_loss": 0.17125360667705536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3317650258541107, "epoch": 4.87, "learning_rate": 1.5209515758966308e-05, "loss": 0.3665, "step": 5762, "task_loss": 0.22879157960414886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45853376388549805, "epoch": 4.87, "learning_rate": 1.520347784084048e-05, "loss": 0.4243, "step": 5763, "task_loss": 0.22903874516487122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.539421796798706, "epoch": 4.87, "learning_rate": 1.5197439922714648e-05, "loss": 0.5323, "step": 5764, "task_loss": 0.7340229153633118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48800137639045715, "epoch": 4.87, "learning_rate": 1.5191402004588817e-05, "loss": 0.4599, "step": 5765, "task_loss": 0.4793279469013214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5754932761192322, "epoch": 4.87, "learning_rate": 1.5185364086462989e-05, "loss": 0.4746, "step": 5766, "task_loss": 1.3344985246658325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4941015839576721, "epoch": 4.87, "learning_rate": 1.5179326168337158e-05, "loss": 0.4667, "step": 5767, "task_loss": 0.7423571348190308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5405023694038391, "epoch": 4.88, "learning_rate": 1.5173288250211328e-05, "loss": 0.5102, "step": 5768, "task_loss": 1.2611517906188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4547557830810547, "epoch": 4.88, "learning_rate": 1.5167250332085497e-05, "loss": 0.572, "step": 5769, "task_loss": 0.2531443238258362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44711682200431824, "epoch": 4.88, "learning_rate": 1.5161212413959666e-05, "loss": 0.452, "step": 5770, "task_loss": 0.41396364569664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28641533851623535, "epoch": 4.88, "learning_rate": 1.5155174495833838e-05, "loss": 0.4711, "step": 5771, "task_loss": 0.7494364380836487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5031849145889282, "epoch": 4.88, "learning_rate": 1.5149136577708006e-05, "loss": 0.4776, "step": 5772, "task_loss": 1.0330506563186646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5102938413619995, "epoch": 4.88, "learning_rate": 1.5143098659582178e-05, "loss": 0.5076, "step": 5773, "task_loss": 0.7059791088104248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4198932647705078, "epoch": 4.88, "learning_rate": 1.5137060741456347e-05, "loss": 0.4032, "step": 5774, "task_loss": 0.5342229604721069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.283042848110199, "epoch": 4.88, "learning_rate": 1.5131022823330516e-05, "loss": 0.3216, "step": 5775, "task_loss": 0.49188709259033203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3742866814136505, "epoch": 4.88, "learning_rate": 1.5124984905204686e-05, "loss": 0.3684, "step": 5776, "task_loss": 0.9383265376091003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3816741704940796, "epoch": 4.88, "learning_rate": 1.5118946987078855e-05, "loss": 0.4913, "step": 5777, "task_loss": 0.5717141628265381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5413243174552917, "epoch": 4.88, "learning_rate": 1.5112909068953027e-05, "loss": 0.4815, "step": 5778, "task_loss": 1.2612712383270264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5110476016998291, "epoch": 4.88, "learning_rate": 1.5106871150827196e-05, "loss": 0.5065, "step": 5779, "task_loss": 0.7782415747642517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5100208520889282, "epoch": 4.89, "learning_rate": 1.5100833232701363e-05, "loss": 0.3971, "step": 5780, "task_loss": 1.1203153133392334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5307128429412842, "epoch": 4.89, "learning_rate": 1.5094795314575536e-05, "loss": 0.4121, "step": 5781, "task_loss": 1.0690267086029053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48200416564941406, "epoch": 4.89, "learning_rate": 1.5088757396449705e-05, "loss": 0.533, "step": 5782, "task_loss": 0.43080538511276245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3921981155872345, "epoch": 4.89, "learning_rate": 1.5082719478323875e-05, "loss": 0.4993, "step": 5783, "task_loss": 0.9054892659187317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48124080896377563, "epoch": 4.89, "learning_rate": 1.5076681560198044e-05, "loss": 0.3361, "step": 5784, "task_loss": 0.9808400273323059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.62503981590271, "epoch": 4.89, "learning_rate": 1.5070643642072213e-05, "loss": 0.4855, "step": 5785, "task_loss": 1.0295519828796387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.832899808883667, "epoch": 4.89, "learning_rate": 1.5064605723946385e-05, "loss": 0.5432, "step": 5786, "task_loss": 0.9598299264907837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.1975598335266113, "epoch": 4.89, "learning_rate": 1.5058567805820554e-05, "loss": 0.6281, "step": 5787, "task_loss": 0.6961100697517395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30831629037857056, "epoch": 4.89, "learning_rate": 1.5052529887694725e-05, "loss": 0.5169, "step": 5788, "task_loss": 0.7045948505401611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48360204696655273, "epoch": 4.89, "learning_rate": 1.5046491969568893e-05, "loss": 0.4486, "step": 5789, "task_loss": 0.598354697227478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3582944869995117, "epoch": 4.89, "learning_rate": 1.5040454051443062e-05, "loss": 0.5294, "step": 5790, "task_loss": 0.5948873162269592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3131830394268036, "epoch": 4.89, "learning_rate": 1.5034416133317233e-05, "loss": 0.3463, "step": 5791, "task_loss": 0.2515471577644348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4120029807090759, "epoch": 4.9, "learning_rate": 1.5028378215191402e-05, "loss": 0.3721, "step": 5792, "task_loss": 0.18850304186344147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24460455775260925, "epoch": 4.9, "learning_rate": 1.5022340297065574e-05, "loss": 0.4778, "step": 5793, "task_loss": 1.0462384223937988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41444817185401917, "epoch": 4.9, "learning_rate": 1.5016302378939743e-05, "loss": 0.5523, "step": 5794, "task_loss": 1.0514357089996338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6986926198005676, "epoch": 4.9, "learning_rate": 1.501026446081391e-05, "loss": 0.4164, "step": 5795, "task_loss": 0.21206903457641602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46723219752311707, "epoch": 4.9, "learning_rate": 1.5004226542688082e-05, "loss": 0.4921, "step": 5796, "task_loss": 0.3758774697780609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9450217485427856, "epoch": 4.9, "learning_rate": 1.4998188624562251e-05, "loss": 0.5995, "step": 5797, "task_loss": 0.6682865619659424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40183451771736145, "epoch": 4.9, "learning_rate": 1.4992150706436422e-05, "loss": 0.5237, "step": 5798, "task_loss": 0.8499565124511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39154326915740967, "epoch": 4.9, "learning_rate": 1.498611278831059e-05, "loss": 0.4685, "step": 5799, "task_loss": 0.4589754045009613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4009566307067871, "epoch": 4.9, "learning_rate": 1.498007487018476e-05, "loss": 0.4869, "step": 5800, "task_loss": 0.5028212666511536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6072397232055664, "epoch": 4.9, "learning_rate": 1.4974036952058932e-05, "loss": 0.5621, "step": 5801, "task_loss": 1.2244374752044678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.706192672252655, "epoch": 4.9, "learning_rate": 1.49679990339331e-05, "loss": 0.4988, "step": 5802, "task_loss": 1.1101514101028442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36713117361068726, "epoch": 4.9, "learning_rate": 1.4961961115807271e-05, "loss": 0.4911, "step": 5803, "task_loss": 0.5822126865386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42061156034469604, "epoch": 4.91, "learning_rate": 1.495592319768144e-05, "loss": 0.6164, "step": 5804, "task_loss": 0.8172554969787598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7587139010429382, "epoch": 4.91, "learning_rate": 1.4949885279555609e-05, "loss": 0.4922, "step": 5805, "task_loss": 0.596482515335083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4406343698501587, "epoch": 4.91, "learning_rate": 1.494384736142978e-05, "loss": 0.4552, "step": 5806, "task_loss": 0.7844243049621582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2350749969482422, "epoch": 4.91, "learning_rate": 1.4937809443303948e-05, "loss": 0.3486, "step": 5807, "task_loss": 0.4691348373889923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2896985113620758, "epoch": 4.91, "learning_rate": 1.493177152517812e-05, "loss": 0.6549, "step": 5808, "task_loss": 0.35225725173950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6432271003723145, "epoch": 4.91, "learning_rate": 1.492573360705229e-05, "loss": 0.4885, "step": 5809, "task_loss": 1.1570217609405518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3027561902999878, "epoch": 4.91, "learning_rate": 1.4919695688926458e-05, "loss": 0.4474, "step": 5810, "task_loss": 1.0662912130355835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40217119455337524, "epoch": 4.91, "learning_rate": 1.4913657770800629e-05, "loss": 0.5823, "step": 5811, "task_loss": 0.7301077842712402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47895097732543945, "epoch": 4.91, "learning_rate": 1.4907619852674798e-05, "loss": 0.452, "step": 5812, "task_loss": 0.3648097813129425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31992220878601074, "epoch": 4.91, "learning_rate": 1.4901581934548968e-05, "loss": 0.4001, "step": 5813, "task_loss": 0.7621862888336182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5665308833122253, "epoch": 4.91, "learning_rate": 1.4895544016423137e-05, "loss": 0.5896, "step": 5814, "task_loss": 0.5625012516975403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41491222381591797, "epoch": 4.91, "learning_rate": 1.4889506098297306e-05, "loss": 0.4595, "step": 5815, "task_loss": 0.9698602557182312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3085525333881378, "epoch": 4.92, "learning_rate": 1.4883468180171479e-05, "loss": 0.4151, "step": 5816, "task_loss": 0.9177283048629761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3345073461532593, "epoch": 4.92, "learning_rate": 1.4877430262045647e-05, "loss": 0.3756, "step": 5817, "task_loss": 0.6464024186134338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3889596462249756, "epoch": 4.92, "learning_rate": 1.4871392343919818e-05, "loss": 0.4763, "step": 5818, "task_loss": 0.5566734671592712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5989587903022766, "epoch": 4.92, "learning_rate": 1.4865354425793987e-05, "loss": 0.4554, "step": 5819, "task_loss": 0.12948866188526154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3671024739742279, "epoch": 4.92, "learning_rate": 1.4859316507668156e-05, "loss": 0.5576, "step": 5820, "task_loss": 0.43852099776268005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5567774772644043, "epoch": 4.92, "learning_rate": 1.4853278589542326e-05, "loss": 0.471, "step": 5821, "task_loss": 1.3301247358322144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34113407135009766, "epoch": 4.92, "learning_rate": 1.4847240671416495e-05, "loss": 0.458, "step": 5822, "task_loss": 0.978023111820221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7804850935935974, "epoch": 4.92, "learning_rate": 1.4841202753290667e-05, "loss": 0.5817, "step": 5823, "task_loss": 1.078391671180725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6289469003677368, "epoch": 4.92, "learning_rate": 1.4835164835164836e-05, "loss": 0.4885, "step": 5824, "task_loss": 0.5521443486213684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4120987355709076, "epoch": 4.92, "learning_rate": 1.4829126917039005e-05, "loss": 0.4166, "step": 5825, "task_loss": 0.27023211121559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7844724655151367, "epoch": 4.92, "learning_rate": 1.4823088998913176e-05, "loss": 0.5908, "step": 5826, "task_loss": 1.4410432577133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30639687180519104, "epoch": 4.93, "learning_rate": 1.4817051080787345e-05, "loss": 0.4905, "step": 5827, "task_loss": 0.437218576669693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2257484644651413, "epoch": 4.93, "learning_rate": 1.4811013162661517e-05, "loss": 0.3364, "step": 5828, "task_loss": 0.17178605496883392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38050517439842224, "epoch": 4.93, "learning_rate": 1.4804975244535684e-05, "loss": 0.5635, "step": 5829, "task_loss": 1.1554322242736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6037095189094543, "epoch": 4.93, "learning_rate": 1.4798937326409853e-05, "loss": 0.4597, "step": 5830, "task_loss": 0.19461466372013092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6499226093292236, "epoch": 4.93, "learning_rate": 1.4792899408284025e-05, "loss": 0.5116, "step": 5831, "task_loss": 0.6344338655471802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6488349437713623, "epoch": 4.93, "learning_rate": 1.4786861490158194e-05, "loss": 0.4408, "step": 5832, "task_loss": 0.9087602496147156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4854084551334381, "epoch": 4.93, "learning_rate": 1.4780823572032365e-05, "loss": 0.4642, "step": 5833, "task_loss": 0.3827601671218872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3597487807273865, "epoch": 4.93, "learning_rate": 1.4774785653906534e-05, "loss": 0.4407, "step": 5834, "task_loss": 0.28483617305755615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41044533252716064, "epoch": 4.93, "learning_rate": 1.4768747735780702e-05, "loss": 0.4847, "step": 5835, "task_loss": 0.532178521156311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5367692708969116, "epoch": 4.93, "learning_rate": 1.4762709817654875e-05, "loss": 0.4745, "step": 5836, "task_loss": 0.9414312243461609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32648587226867676, "epoch": 4.93, "learning_rate": 1.4756671899529042e-05, "loss": 0.4396, "step": 5837, "task_loss": 0.48526817560195923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5288338661193848, "epoch": 4.93, "learning_rate": 1.4750633981403214e-05, "loss": 0.5427, "step": 5838, "task_loss": 1.1049845218658447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3779226243495941, "epoch": 4.94, "learning_rate": 1.4744596063277383e-05, "loss": 0.4833, "step": 5839, "task_loss": 0.2531374990940094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30419260263442993, "epoch": 4.94, "learning_rate": 1.4738558145151552e-05, "loss": 0.344, "step": 5840, "task_loss": 0.20573073625564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47613024711608887, "epoch": 4.94, "learning_rate": 1.4732520227025722e-05, "loss": 0.4087, "step": 5841, "task_loss": 0.5432329177856445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4031655490398407, "epoch": 4.94, "learning_rate": 1.4726482308899891e-05, "loss": 0.4612, "step": 5842, "task_loss": 0.26833173632621765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4328988194465637, "epoch": 4.94, "learning_rate": 1.4720444390774064e-05, "loss": 0.5883, "step": 5843, "task_loss": 0.5638561844825745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6232972145080566, "epoch": 4.94, "learning_rate": 1.4714406472648232e-05, "loss": 0.5523, "step": 5844, "task_loss": 0.9876930713653564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2712046802043915, "epoch": 4.94, "learning_rate": 1.47083685545224e-05, "loss": 0.3715, "step": 5845, "task_loss": 0.03049248829483986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5060746073722839, "epoch": 4.94, "learning_rate": 1.4702330636396572e-05, "loss": 0.4787, "step": 5846, "task_loss": 0.6439326405525208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3097953200340271, "epoch": 4.94, "learning_rate": 1.469629271827074e-05, "loss": 0.449, "step": 5847, "task_loss": 0.16452401876449585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8650487661361694, "epoch": 4.94, "learning_rate": 1.4690254800144911e-05, "loss": 0.5764, "step": 5848, "task_loss": 1.339842438697815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6167279481887817, "epoch": 4.94, "learning_rate": 1.468421688201908e-05, "loss": 0.5044, "step": 5849, "task_loss": 0.6929930448532104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3980950117111206, "epoch": 4.94, "learning_rate": 1.4678178963893249e-05, "loss": 0.5074, "step": 5850, "task_loss": 0.8631390333175659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3490990996360779, "epoch": 4.95, "learning_rate": 1.4672141045767421e-05, "loss": 0.4935, "step": 5851, "task_loss": 0.270679235458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3136811852455139, "epoch": 4.95, "learning_rate": 1.466610312764159e-05, "loss": 0.6327, "step": 5852, "task_loss": 1.1166059970855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3603687882423401, "epoch": 4.95, "learning_rate": 1.466006520951576e-05, "loss": 0.4278, "step": 5853, "task_loss": 0.2023213654756546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25370514392852783, "epoch": 4.95, "learning_rate": 1.465402729138993e-05, "loss": 0.5277, "step": 5854, "task_loss": 0.7227314114570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3278053402900696, "epoch": 4.95, "learning_rate": 1.4647989373264099e-05, "loss": 0.5141, "step": 5855, "task_loss": 0.2890128195285797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6116062998771667, "epoch": 4.95, "learning_rate": 1.464195145513827e-05, "loss": 0.5481, "step": 5856, "task_loss": 0.4133390486240387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5500404834747314, "epoch": 4.95, "learning_rate": 1.4635913537012438e-05, "loss": 0.4849, "step": 5857, "task_loss": 0.6949512958526611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5884612798690796, "epoch": 4.95, "learning_rate": 1.462987561888661e-05, "loss": 0.512, "step": 5858, "task_loss": 1.1272770166397095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4891970753669739, "epoch": 4.95, "learning_rate": 1.462383770076078e-05, "loss": 0.6648, "step": 5859, "task_loss": 0.7570291757583618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3876471221446991, "epoch": 4.95, "learning_rate": 1.4617799782634946e-05, "loss": 0.3473, "step": 5860, "task_loss": 0.3063424825668335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3969310522079468, "epoch": 4.95, "learning_rate": 1.4611761864509119e-05, "loss": 0.6064, "step": 5861, "task_loss": 0.6238088607788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29979634284973145, "epoch": 4.95, "learning_rate": 1.4605723946383288e-05, "loss": 0.5242, "step": 5862, "task_loss": 0.06515660136938095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37218183279037476, "epoch": 4.96, "learning_rate": 1.4599686028257458e-05, "loss": 0.4648, "step": 5863, "task_loss": 0.152825266122818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2802339494228363, "epoch": 4.96, "learning_rate": 1.4593648110131627e-05, "loss": 0.5194, "step": 5864, "task_loss": 0.3493371307849884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4369129240512848, "epoch": 4.96, "learning_rate": 1.4587610192005796e-05, "loss": 0.601, "step": 5865, "task_loss": 1.2810916900634766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4378892183303833, "epoch": 4.96, "learning_rate": 1.4581572273879968e-05, "loss": 0.4517, "step": 5866, "task_loss": 0.4537532925605774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4011518955230713, "epoch": 4.96, "learning_rate": 1.4575534355754137e-05, "loss": 0.5105, "step": 5867, "task_loss": 1.0126134157180786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4999954104423523, "epoch": 4.96, "learning_rate": 1.4569496437628308e-05, "loss": 0.5696, "step": 5868, "task_loss": 0.691393256187439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5981533527374268, "epoch": 4.96, "learning_rate": 1.4563458519502476e-05, "loss": 0.547, "step": 5869, "task_loss": 0.8819162845611572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3164193630218506, "epoch": 4.96, "learning_rate": 1.4557420601376645e-05, "loss": 0.4389, "step": 5870, "task_loss": 1.1230006217956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40841490030288696, "epoch": 4.96, "learning_rate": 1.4551382683250816e-05, "loss": 0.3507, "step": 5871, "task_loss": 0.15326306223869324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5229266881942749, "epoch": 4.96, "learning_rate": 1.4545344765124985e-05, "loss": 0.5676, "step": 5872, "task_loss": 0.5109989047050476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6146092414855957, "epoch": 4.96, "learning_rate": 1.4539306846999157e-05, "loss": 0.3857, "step": 5873, "task_loss": 0.47085052728652954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31163936853408813, "epoch": 4.96, "learning_rate": 1.4533268928873326e-05, "loss": 0.4197, "step": 5874, "task_loss": 0.30774977803230286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2735847234725952, "epoch": 4.97, "learning_rate": 1.4527231010747495e-05, "loss": 0.4672, "step": 5875, "task_loss": 0.6831485629081726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5137497186660767, "epoch": 4.97, "learning_rate": 1.4521193092621665e-05, "loss": 0.4959, "step": 5876, "task_loss": 0.1318991482257843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24189987778663635, "epoch": 4.97, "learning_rate": 1.4515155174495834e-05, "loss": 0.4358, "step": 5877, "task_loss": 0.8447002172470093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2813869118690491, "epoch": 4.97, "learning_rate": 1.4509117256370005e-05, "loss": 0.5095, "step": 5878, "task_loss": 0.4457683265209198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8438689708709717, "epoch": 4.97, "learning_rate": 1.4503079338244174e-05, "loss": 0.763, "step": 5879, "task_loss": 1.0650538206100464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3680497407913208, "epoch": 4.97, "learning_rate": 1.4497041420118343e-05, "loss": 0.4162, "step": 5880, "task_loss": 0.3631020486354828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6185603737831116, "epoch": 4.97, "learning_rate": 1.4491003501992515e-05, "loss": 0.6073, "step": 5881, "task_loss": 1.1768121719360352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3132491111755371, "epoch": 4.97, "learning_rate": 1.4484965583866684e-05, "loss": 0.4332, "step": 5882, "task_loss": 0.6072292327880859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.451108455657959, "epoch": 4.97, "learning_rate": 1.4478927665740854e-05, "loss": 0.4479, "step": 5883, "task_loss": 0.9165635704994202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5627396106719971, "epoch": 4.97, "learning_rate": 1.4472889747615023e-05, "loss": 0.572, "step": 5884, "task_loss": 0.39646631479263306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8555253744125366, "epoch": 4.97, "learning_rate": 1.4466851829489192e-05, "loss": 0.5001, "step": 5885, "task_loss": 0.6878842711448669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5245431661605835, "epoch": 4.97, "learning_rate": 1.4460813911363363e-05, "loss": 0.4436, "step": 5886, "task_loss": 0.6413938403129578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19140513241291046, "epoch": 4.98, "learning_rate": 1.4454775993237531e-05, "loss": 0.2676, "step": 5887, "task_loss": 0.13698330521583557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.957083523273468, "epoch": 4.98, "learning_rate": 1.4448738075111704e-05, "loss": 0.5191, "step": 5888, "task_loss": 0.8026174306869507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.264891654253006, "epoch": 4.98, "learning_rate": 1.4442700156985873e-05, "loss": 0.3094, "step": 5889, "task_loss": 0.30313175916671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5777595043182373, "epoch": 4.98, "learning_rate": 1.4436662238860041e-05, "loss": 0.4691, "step": 5890, "task_loss": 0.6727564334869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.13674266636371613, "epoch": 4.98, "learning_rate": 1.4430624320734212e-05, "loss": 0.379, "step": 5891, "task_loss": 0.27789682149887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5818727612495422, "epoch": 4.98, "learning_rate": 1.4424586402608381e-05, "loss": 0.6628, "step": 5892, "task_loss": 0.6469494700431824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5610709190368652, "epoch": 4.98, "learning_rate": 1.4418548484482553e-05, "loss": 0.5601, "step": 5893, "task_loss": 0.2761783301830292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6625365018844604, "epoch": 4.98, "learning_rate": 1.441251056635672e-05, "loss": 0.4939, "step": 5894, "task_loss": 1.0583909749984741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.581427276134491, "epoch": 4.98, "learning_rate": 1.440647264823089e-05, "loss": 0.5764, "step": 5895, "task_loss": 0.6895261406898499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6571460366249084, "epoch": 4.98, "learning_rate": 1.4400434730105062e-05, "loss": 0.5062, "step": 5896, "task_loss": 0.74186110496521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4037855267524719, "epoch": 4.98, "learning_rate": 1.439439681197923e-05, "loss": 0.444, "step": 5897, "task_loss": 0.5062740445137024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28357407450675964, "epoch": 4.99, "learning_rate": 1.4388358893853401e-05, "loss": 0.3359, "step": 5898, "task_loss": 0.23819175362586975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4217330813407898, "epoch": 4.99, "learning_rate": 1.438232097572757e-05, "loss": 0.4246, "step": 5899, "task_loss": 0.6964184641838074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9314852356910706, "epoch": 4.99, "learning_rate": 1.4376283057601739e-05, "loss": 0.6022, "step": 5900, "task_loss": 0.9725218415260315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38695868849754333, "epoch": 4.99, "learning_rate": 1.4370245139475911e-05, "loss": 0.3766, "step": 5901, "task_loss": 0.6227385401725769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6097522974014282, "epoch": 4.99, "learning_rate": 1.4364207221350078e-05, "loss": 0.5886, "step": 5902, "task_loss": 0.8588147759437561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8629091382026672, "epoch": 4.99, "learning_rate": 1.4358169303224247e-05, "loss": 0.6821, "step": 5903, "task_loss": 0.5130578279495239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5736752152442932, "epoch": 4.99, "learning_rate": 1.435213138509842e-05, "loss": 0.4862, "step": 5904, "task_loss": 0.5569455027580261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.645703911781311, "epoch": 4.99, "learning_rate": 1.4346093466972588e-05, "loss": 0.5298, "step": 5905, "task_loss": 0.5157227516174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37729692459106445, "epoch": 4.99, "learning_rate": 1.4340055548846759e-05, "loss": 0.4047, "step": 5906, "task_loss": 0.5590080618858337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5275990962982178, "epoch": 4.99, "learning_rate": 1.4334017630720928e-05, "loss": 0.6381, "step": 5907, "task_loss": 0.979600191116333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5278711318969727, "epoch": 4.99, "learning_rate": 1.4327979712595097e-05, "loss": 0.4737, "step": 5908, "task_loss": 0.5000836253166199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37122756242752075, "epoch": 4.99, "learning_rate": 1.4321941794469269e-05, "loss": 0.5042, "step": 5909, "task_loss": 1.2997733354568481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6282119750976562, "epoch": 5.0, "learning_rate": 1.4315903876343436e-05, "loss": 0.5548, "step": 5910, "task_loss": 0.7074734568595886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4617100954055786, "epoch": 5.0, "learning_rate": 1.4309865958217608e-05, "loss": 0.3705, "step": 5911, "task_loss": 0.8815626502037048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45544254779815674, "epoch": 5.0, "learning_rate": 1.4303828040091777e-05, "loss": 0.5654, "step": 5912, "task_loss": 0.22035664319992065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.600818395614624, "epoch": 5.0, "learning_rate": 1.4297790121965946e-05, "loss": 0.5078, "step": 5913, "task_loss": 0.3404569923877716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8749569654464722, "epoch": 5.0, "learning_rate": 1.4291752203840117e-05, "loss": 0.534, "step": 5914, "task_loss": 0.820824384689331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.252996027469635, "epoch": 5.0, "learning_rate": 1.4285714285714285e-05, "loss": 0.4369, "step": 5915, "task_loss": 0.5022522807121277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3851630389690399, "epoch": 5.0, "learning_rate": 1.4279676367588458e-05, "loss": 0.8312, "step": 5916, "task_loss": 0.5981258749961853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5213533639907837, "epoch": 5.0, "learning_rate": 1.4273638449462627e-05, "loss": 0.4492, "step": 5917, "task_loss": 0.5999496579170227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7086391448974609, "epoch": 5.0, "learning_rate": 1.4267600531336794e-05, "loss": 0.5934, "step": 5918, "task_loss": 0.9338732361793518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5655124187469482, "epoch": 5.0, "learning_rate": 1.4261562613210966e-05, "loss": 0.4947, "step": 5919, "task_loss": 0.6104177832603455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.824927806854248, "epoch": 5.0, "learning_rate": 1.4255524695085135e-05, "loss": 0.5232, "step": 5920, "task_loss": 0.8307041525840759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7413452863693237, "epoch": 5.01, "learning_rate": 1.4249486776959305e-05, "loss": 0.5431, "step": 5921, "task_loss": 0.5468769669532776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39320409297943115, "epoch": 5.01, "learning_rate": 1.4243448858833474e-05, "loss": 0.4708, "step": 5922, "task_loss": 0.7387615442276001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.79927659034729, "epoch": 5.01, "learning_rate": 1.4237410940707643e-05, "loss": 0.6666, "step": 5923, "task_loss": 0.6731265187263489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36455684900283813, "epoch": 5.01, "learning_rate": 1.4231373022581815e-05, "loss": 0.4146, "step": 5924, "task_loss": 0.5640330910682678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5359421968460083, "epoch": 5.01, "learning_rate": 1.4225335104455983e-05, "loss": 0.4275, "step": 5925, "task_loss": 0.505700409412384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17871074378490448, "epoch": 5.01, "learning_rate": 1.4219297186330155e-05, "loss": 0.4018, "step": 5926, "task_loss": 0.8501909375190735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4953292906284332, "epoch": 5.01, "learning_rate": 1.4213259268204324e-05, "loss": 0.4543, "step": 5927, "task_loss": 1.4153022766113281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4085580110549927, "epoch": 5.01, "learning_rate": 1.4207221350078493e-05, "loss": 0.4821, "step": 5928, "task_loss": 0.08659082651138306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5490870475769043, "epoch": 5.01, "learning_rate": 1.4201183431952663e-05, "loss": 0.4103, "step": 5929, "task_loss": 0.20959487557411194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4751938581466675, "epoch": 5.01, "learning_rate": 1.4195145513826832e-05, "loss": 0.4259, "step": 5930, "task_loss": 0.21572832763195038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5338667035102844, "epoch": 5.01, "learning_rate": 1.4189107595701004e-05, "loss": 0.5098, "step": 5931, "task_loss": 1.6079308986663818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25754159688949585, "epoch": 5.01, "learning_rate": 1.4183069677575173e-05, "loss": 0.3872, "step": 5932, "task_loss": 0.5583166480064392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3172677755355835, "epoch": 5.02, "learning_rate": 1.417703175944934e-05, "loss": 0.3944, "step": 5933, "task_loss": 1.0343488454818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34228354692459106, "epoch": 5.02, "learning_rate": 1.4170993841323513e-05, "loss": 0.4722, "step": 5934, "task_loss": 0.49838778376579285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4840555489063263, "epoch": 5.02, "learning_rate": 1.4164955923197682e-05, "loss": 0.4237, "step": 5935, "task_loss": 0.5111437439918518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3206411302089691, "epoch": 5.02, "learning_rate": 1.4158918005071852e-05, "loss": 0.4337, "step": 5936, "task_loss": 0.1432463526725769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4909075200557709, "epoch": 5.02, "learning_rate": 1.4152880086946021e-05, "loss": 0.4344, "step": 5937, "task_loss": 1.1542472839355469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4109368622303009, "epoch": 5.02, "learning_rate": 1.414684216882019e-05, "loss": 0.5097, "step": 5938, "task_loss": 0.29364532232284546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7045813798904419, "epoch": 5.02, "learning_rate": 1.4140804250694362e-05, "loss": 0.5035, "step": 5939, "task_loss": 1.4589147567749023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5719457864761353, "epoch": 5.02, "learning_rate": 1.4134766332568531e-05, "loss": 0.5066, "step": 5940, "task_loss": 0.5735933184623718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6096122860908508, "epoch": 5.02, "learning_rate": 1.4128728414442702e-05, "loss": 0.4796, "step": 5941, "task_loss": 1.2086384296417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6042701005935669, "epoch": 5.02, "learning_rate": 1.412269049631687e-05, "loss": 0.4657, "step": 5942, "task_loss": 0.4611886441707611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5211130380630493, "epoch": 5.02, "learning_rate": 1.411665257819104e-05, "loss": 0.5548, "step": 5943, "task_loss": 0.8150731325149536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.285304456949234, "epoch": 5.02, "learning_rate": 1.411061466006521e-05, "loss": 0.4676, "step": 5944, "task_loss": 0.6937703490257263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6477197408676147, "epoch": 5.03, "learning_rate": 1.4104576741939379e-05, "loss": 0.4886, "step": 5945, "task_loss": 0.523460865020752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.638511061668396, "epoch": 5.03, "learning_rate": 1.4098538823813551e-05, "loss": 0.3574, "step": 5946, "task_loss": 0.2648000121116638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3840251863002777, "epoch": 5.03, "learning_rate": 1.409250090568772e-05, "loss": 0.6735, "step": 5947, "task_loss": 0.5291072726249695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36865752935409546, "epoch": 5.03, "learning_rate": 1.4086462987561889e-05, "loss": 0.4441, "step": 5948, "task_loss": 1.163393259048462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40287458896636963, "epoch": 5.03, "learning_rate": 1.408042506943606e-05, "loss": 0.4299, "step": 5949, "task_loss": 0.2987561523914337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35082125663757324, "epoch": 5.03, "learning_rate": 1.4074387151310228e-05, "loss": 0.3744, "step": 5950, "task_loss": 0.3797934353351593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38333818316459656, "epoch": 5.03, "learning_rate": 1.4068349233184399e-05, "loss": 0.5009, "step": 5951, "task_loss": 0.8500578999519348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7343951463699341, "epoch": 5.03, "learning_rate": 1.4062311315058568e-05, "loss": 0.4746, "step": 5952, "task_loss": 0.3409634232521057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43828216195106506, "epoch": 5.03, "learning_rate": 1.4056273396932737e-05, "loss": 0.3911, "step": 5953, "task_loss": 1.0307278633117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3163219690322876, "epoch": 5.03, "learning_rate": 1.4050235478806909e-05, "loss": 0.3713, "step": 5954, "task_loss": 0.18032242357730865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8195264339447021, "epoch": 5.03, "learning_rate": 1.4044197560681078e-05, "loss": 0.5284, "step": 5955, "task_loss": 1.0049339532852173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3877018094062805, "epoch": 5.03, "learning_rate": 1.4038159642555248e-05, "loss": 0.3698, "step": 5956, "task_loss": 0.5924592018127441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2178558111190796, "epoch": 5.04, "learning_rate": 1.4032121724429417e-05, "loss": 0.3679, "step": 5957, "task_loss": 0.24024835228919983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7152705192565918, "epoch": 5.04, "learning_rate": 1.4026083806303586e-05, "loss": 0.587, "step": 5958, "task_loss": 0.7356069087982178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.56902015209198, "epoch": 5.04, "learning_rate": 1.4020045888177757e-05, "loss": 0.4732, "step": 5959, "task_loss": 1.5463863611221313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3452136218547821, "epoch": 5.04, "learning_rate": 1.4014007970051926e-05, "loss": 0.4203, "step": 5960, "task_loss": 0.19633322954177856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3555434048175812, "epoch": 5.04, "learning_rate": 1.4007970051926098e-05, "loss": 0.5184, "step": 5961, "task_loss": 0.227996826171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4558466076850891, "epoch": 5.04, "learning_rate": 1.4001932133800267e-05, "loss": 0.4376, "step": 5962, "task_loss": 0.6436222791671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49706026911735535, "epoch": 5.04, "learning_rate": 1.3995894215674436e-05, "loss": 0.5675, "step": 5963, "task_loss": 1.4766284227371216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3784533441066742, "epoch": 5.04, "learning_rate": 1.3989856297548606e-05, "loss": 0.4673, "step": 5964, "task_loss": 0.5140158534049988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4315754473209381, "epoch": 5.04, "learning_rate": 1.3983818379422775e-05, "loss": 0.434, "step": 5965, "task_loss": 1.3529188632965088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5177199840545654, "epoch": 5.04, "learning_rate": 1.3977780461296947e-05, "loss": 0.5322, "step": 5966, "task_loss": 1.0734010934829712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3252885639667511, "epoch": 5.04, "learning_rate": 1.3971742543171114e-05, "loss": 0.449, "step": 5967, "task_loss": 0.7452465891838074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6205657720565796, "epoch": 5.04, "learning_rate": 1.3965704625045283e-05, "loss": 0.5014, "step": 5968, "task_loss": 0.6555680632591248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7925713062286377, "epoch": 5.05, "learning_rate": 1.3959666706919456e-05, "loss": 0.6667, "step": 5969, "task_loss": 2.157383680343628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3912781774997711, "epoch": 5.05, "learning_rate": 1.3953628788793624e-05, "loss": 0.6124, "step": 5970, "task_loss": 0.21668975055217743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.599994421005249, "epoch": 5.05, "learning_rate": 1.3947590870667795e-05, "loss": 0.4523, "step": 5971, "task_loss": 1.3879796266555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40146079659461975, "epoch": 5.05, "learning_rate": 1.3941552952541964e-05, "loss": 0.6216, "step": 5972, "task_loss": 0.8643374443054199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2691670060157776, "epoch": 5.05, "learning_rate": 1.3935515034416133e-05, "loss": 0.3521, "step": 5973, "task_loss": 0.520159125328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4834488332271576, "epoch": 5.05, "learning_rate": 1.3929477116290305e-05, "loss": 0.5006, "step": 5974, "task_loss": 0.6700630187988281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4914935827255249, "epoch": 5.05, "learning_rate": 1.3923439198164472e-05, "loss": 0.3669, "step": 5975, "task_loss": 0.5475313663482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6089412569999695, "epoch": 5.05, "learning_rate": 1.3917401280038645e-05, "loss": 0.5273, "step": 5976, "task_loss": 1.2933344841003418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5726367235183716, "epoch": 5.05, "learning_rate": 1.3911363361912813e-05, "loss": 0.5114, "step": 5977, "task_loss": 0.4062042534351349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.372598797082901, "epoch": 5.05, "learning_rate": 1.3905325443786982e-05, "loss": 0.4542, "step": 5978, "task_loss": 0.28348684310913086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46051955223083496, "epoch": 5.05, "learning_rate": 1.3899287525661153e-05, "loss": 0.3955, "step": 5979, "task_loss": 0.285810649394989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2923417091369629, "epoch": 5.05, "learning_rate": 1.3893249607535322e-05, "loss": 0.4758, "step": 5980, "task_loss": 0.1647213101387024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3080926537513733, "epoch": 5.06, "learning_rate": 1.3887211689409494e-05, "loss": 0.465, "step": 5981, "task_loss": 1.0512843132019043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49598783254623413, "epoch": 5.06, "learning_rate": 1.3881173771283663e-05, "loss": 0.5469, "step": 5982, "task_loss": 1.1917130947113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5528882145881653, "epoch": 5.06, "learning_rate": 1.387513585315783e-05, "loss": 0.4241, "step": 5983, "task_loss": 0.2213740348815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8582708835601807, "epoch": 5.06, "learning_rate": 1.3869097935032002e-05, "loss": 0.6538, "step": 5984, "task_loss": 1.4047415256500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5671178102493286, "epoch": 5.06, "learning_rate": 1.3863060016906171e-05, "loss": 0.4732, "step": 5985, "task_loss": 1.1533161401748657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4082861840724945, "epoch": 5.06, "learning_rate": 1.3857022098780342e-05, "loss": 0.4781, "step": 5986, "task_loss": 0.7760971188545227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6113453507423401, "epoch": 5.06, "learning_rate": 1.385098418065451e-05, "loss": 0.5021, "step": 5987, "task_loss": 2.1331605911254883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40066906809806824, "epoch": 5.06, "learning_rate": 1.384494626252868e-05, "loss": 0.5455, "step": 5988, "task_loss": 0.3311357796192169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34928593039512634, "epoch": 5.06, "learning_rate": 1.3838908344402852e-05, "loss": 0.384, "step": 5989, "task_loss": 1.1157867908477783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44127339124679565, "epoch": 5.06, "learning_rate": 1.3832870426277019e-05, "loss": 0.4786, "step": 5990, "task_loss": 0.5373550653457642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3964579701423645, "epoch": 5.06, "learning_rate": 1.3826832508151191e-05, "loss": 0.5821, "step": 5991, "task_loss": 1.3341269493103027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23927077651023865, "epoch": 5.07, "learning_rate": 1.382079459002536e-05, "loss": 0.3745, "step": 5992, "task_loss": 0.14874033629894257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4649468958377838, "epoch": 5.07, "learning_rate": 1.3814756671899529e-05, "loss": 0.5224, "step": 5993, "task_loss": 1.2135146856307983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8507091403007507, "epoch": 5.07, "learning_rate": 1.38087187537737e-05, "loss": 0.4885, "step": 5994, "task_loss": 1.1142359972000122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41716885566711426, "epoch": 5.07, "learning_rate": 1.3802680835647868e-05, "loss": 0.5136, "step": 5995, "task_loss": 0.8956722021102905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3436892032623291, "epoch": 5.07, "learning_rate": 1.379664291752204e-05, "loss": 0.5156, "step": 5996, "task_loss": 0.9041117429733276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29305529594421387, "epoch": 5.07, "learning_rate": 1.379060499939621e-05, "loss": 0.3566, "step": 5997, "task_loss": 0.10946956276893616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19880545139312744, "epoch": 5.07, "learning_rate": 1.3784567081270377e-05, "loss": 0.3587, "step": 5998, "task_loss": 0.05080127343535423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3904602527618408, "epoch": 5.07, "learning_rate": 1.3778529163144549e-05, "loss": 0.3601, "step": 5999, "task_loss": 0.6094356775283813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6640608310699463, "epoch": 5.07, "learning_rate": 1.3772491245018718e-05, "loss": 0.5787, "step": 6000, "task_loss": 0.18646833300590515 }, { "epoch": 5.07, "eval_accuracy": 0.9078415841584159, "eval_loss": 0.30195334553718567, "eval_runtime": 226.4735, "eval_samples_per_second": 111.492, "eval_steps_per_second": 0.874, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7259561419487, "epoch": 5.07, "learning_rate": 1.3766453326892888e-05, "loss": 0.4455, "step": 6001, "task_loss": 0.8431296944618225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3298564553260803, "epoch": 5.07, "learning_rate": 1.3760415408767057e-05, "loss": 0.5435, "step": 6002, "task_loss": 0.996370255947113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3912476599216461, "epoch": 5.07, "learning_rate": 1.3754377490641226e-05, "loss": 0.4072, "step": 6003, "task_loss": 0.06636767089366913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47756001353263855, "epoch": 5.08, "learning_rate": 1.3748339572515398e-05, "loss": 0.4109, "step": 6004, "task_loss": 0.6571682691574097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4718065559864044, "epoch": 5.08, "learning_rate": 1.3742301654389567e-05, "loss": 0.392, "step": 6005, "task_loss": 1.1199108362197876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40528351068496704, "epoch": 5.08, "learning_rate": 1.3736263736263738e-05, "loss": 0.3548, "step": 6006, "task_loss": 0.42825576663017273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3780808448791504, "epoch": 5.08, "learning_rate": 1.3730225818137907e-05, "loss": 0.3659, "step": 6007, "task_loss": 1.0530563592910767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39639055728912354, "epoch": 5.08, "learning_rate": 1.3724187900012076e-05, "loss": 0.4787, "step": 6008, "task_loss": 0.4536390006542206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4206335246562958, "epoch": 5.08, "learning_rate": 1.3718149981886246e-05, "loss": 0.3619, "step": 6009, "task_loss": 1.1850595474243164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29675641655921936, "epoch": 5.08, "learning_rate": 1.3712112063760415e-05, "loss": 0.5426, "step": 6010, "task_loss": 0.16049526631832123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5407566428184509, "epoch": 5.08, "learning_rate": 1.3706074145634587e-05, "loss": 0.4744, "step": 6011, "task_loss": 0.933721125125885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.416929692029953, "epoch": 5.08, "learning_rate": 1.3700036227508756e-05, "loss": 0.5826, "step": 6012, "task_loss": 0.26832908391952515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5752192735671997, "epoch": 5.08, "learning_rate": 1.3693998309382925e-05, "loss": 0.432, "step": 6013, "task_loss": 1.1222081184387207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3737102150917053, "epoch": 5.08, "learning_rate": 1.3687960391257096e-05, "loss": 0.39, "step": 6014, "task_loss": 0.08444869518280029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29432401061058044, "epoch": 5.08, "learning_rate": 1.3681922473131265e-05, "loss": 0.3323, "step": 6015, "task_loss": 0.2860241234302521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6316185593605042, "epoch": 5.09, "learning_rate": 1.3675884555005435e-05, "loss": 0.3914, "step": 6016, "task_loss": 0.6852641105651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5086910724639893, "epoch": 5.09, "learning_rate": 1.3669846636879604e-05, "loss": 0.5866, "step": 6017, "task_loss": 0.3768693804740906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4156798720359802, "epoch": 5.09, "learning_rate": 1.3663808718753773e-05, "loss": 0.4659, "step": 6018, "task_loss": 0.809343159198761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46982911229133606, "epoch": 5.09, "learning_rate": 1.3657770800627945e-05, "loss": 0.5185, "step": 6019, "task_loss": 0.10722782462835312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4374825954437256, "epoch": 5.09, "learning_rate": 1.3651732882502114e-05, "loss": 0.4828, "step": 6020, "task_loss": 0.6165213584899902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.727118730545044, "epoch": 5.09, "learning_rate": 1.3645694964376285e-05, "loss": 0.6018, "step": 6021, "task_loss": 1.128334641456604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30963483452796936, "epoch": 5.09, "learning_rate": 1.3639657046250454e-05, "loss": 0.4595, "step": 6022, "task_loss": 0.8902470469474792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5258951187133789, "epoch": 5.09, "learning_rate": 1.3633619128124622e-05, "loss": 0.5562, "step": 6023, "task_loss": 0.41981035470962524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7069607973098755, "epoch": 5.09, "learning_rate": 1.3627581209998793e-05, "loss": 0.5191, "step": 6024, "task_loss": 0.2582801580429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6058889627456665, "epoch": 5.09, "learning_rate": 1.3621543291872962e-05, "loss": 0.465, "step": 6025, "task_loss": 0.5562323927879333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39247676730155945, "epoch": 5.09, "learning_rate": 1.3615505373747134e-05, "loss": 0.3144, "step": 6026, "task_loss": 0.25659263134002686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4586787223815918, "epoch": 5.09, "learning_rate": 1.3609467455621303e-05, "loss": 0.5015, "step": 6027, "task_loss": 0.20159313082695007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4671986699104309, "epoch": 5.1, "learning_rate": 1.3603429537495472e-05, "loss": 0.4835, "step": 6028, "task_loss": 1.1619223356246948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26620790362358093, "epoch": 5.1, "learning_rate": 1.3597391619369642e-05, "loss": 0.598, "step": 6029, "task_loss": 0.405183345079422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5016902685165405, "epoch": 5.1, "learning_rate": 1.3591353701243811e-05, "loss": 0.4632, "step": 6030, "task_loss": 1.0069116353988647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2515528202056885, "epoch": 5.1, "learning_rate": 1.3585315783117984e-05, "loss": 0.3232, "step": 6031, "task_loss": 0.22121688723564148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43135589361190796, "epoch": 5.1, "learning_rate": 1.357927786499215e-05, "loss": 0.4431, "step": 6032, "task_loss": 0.4904557764530182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5272772908210754, "epoch": 5.1, "learning_rate": 1.357323994686632e-05, "loss": 0.5532, "step": 6033, "task_loss": 1.227022409439087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3377170264720917, "epoch": 5.1, "learning_rate": 1.3567202028740492e-05, "loss": 0.3893, "step": 6034, "task_loss": 0.10974021255970001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5595309734344482, "epoch": 5.1, "learning_rate": 1.356116411061466e-05, "loss": 0.4964, "step": 6035, "task_loss": 0.7340883612632751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48726242780685425, "epoch": 5.1, "learning_rate": 1.3555126192488831e-05, "loss": 0.5724, "step": 6036, "task_loss": 0.26538988947868347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6428620219230652, "epoch": 5.1, "learning_rate": 1.3549088274363e-05, "loss": 0.4895, "step": 6037, "task_loss": 0.5712012648582458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40082937479019165, "epoch": 5.1, "learning_rate": 1.3543050356237169e-05, "loss": 0.5232, "step": 6038, "task_loss": 0.58014976978302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28909146785736084, "epoch": 5.1, "learning_rate": 1.3537012438111341e-05, "loss": 0.4127, "step": 6039, "task_loss": 0.4486733078956604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31982994079589844, "epoch": 5.11, "learning_rate": 1.3530974519985509e-05, "loss": 0.331, "step": 6040, "task_loss": 0.4047614336013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3360825479030609, "epoch": 5.11, "learning_rate": 1.352493660185968e-05, "loss": 0.5412, "step": 6041, "task_loss": 0.18505176901817322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7931953072547913, "epoch": 5.11, "learning_rate": 1.351889868373385e-05, "loss": 0.5883, "step": 6042, "task_loss": 0.7573176622390747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45055651664733887, "epoch": 5.11, "learning_rate": 1.3512860765608019e-05, "loss": 0.4752, "step": 6043, "task_loss": 0.8979575634002686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.263220876455307, "epoch": 5.11, "learning_rate": 1.350682284748219e-05, "loss": 0.4862, "step": 6044, "task_loss": 1.0195878744125366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3075276017189026, "epoch": 5.11, "learning_rate": 1.3500784929356358e-05, "loss": 0.5362, "step": 6045, "task_loss": 0.7288013100624084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.361200749874115, "epoch": 5.11, "learning_rate": 1.349474701123053e-05, "loss": 0.3927, "step": 6046, "task_loss": 1.2523176670074463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.666648268699646, "epoch": 5.11, "learning_rate": 1.3488709093104697e-05, "loss": 0.4789, "step": 6047, "task_loss": 0.1753956377506256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2796754240989685, "epoch": 5.11, "learning_rate": 1.3482671174978866e-05, "loss": 0.3953, "step": 6048, "task_loss": 0.13282561302185059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46063441038131714, "epoch": 5.11, "learning_rate": 1.3476633256853039e-05, "loss": 0.4075, "step": 6049, "task_loss": 0.5712062120437622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4028043746948242, "epoch": 5.11, "learning_rate": 1.3470595338727207e-05, "loss": 0.4733, "step": 6050, "task_loss": 1.1346734762191772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4233834743499756, "epoch": 5.11, "learning_rate": 1.3464557420601378e-05, "loss": 0.4924, "step": 6051, "task_loss": 1.0581886768341064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28586089611053467, "epoch": 5.12, "learning_rate": 1.3458519502475547e-05, "loss": 0.4679, "step": 6052, "task_loss": 0.4870016574859619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3542870283126831, "epoch": 5.12, "learning_rate": 1.3452481584349716e-05, "loss": 0.4017, "step": 6053, "task_loss": 0.8707901239395142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4612223207950592, "epoch": 5.12, "learning_rate": 1.3446443666223888e-05, "loss": 0.4379, "step": 6054, "task_loss": 0.9345571994781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4924331307411194, "epoch": 5.12, "learning_rate": 1.3440405748098055e-05, "loss": 0.4681, "step": 6055, "task_loss": 1.4472228288650513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40319788455963135, "epoch": 5.12, "learning_rate": 1.3434367829972228e-05, "loss": 0.5131, "step": 6056, "task_loss": 0.36424267292022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47285884618759155, "epoch": 5.12, "learning_rate": 1.3428329911846396e-05, "loss": 0.352, "step": 6057, "task_loss": 0.559246838092804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4122989773750305, "epoch": 5.12, "learning_rate": 1.3422291993720565e-05, "loss": 0.5335, "step": 6058, "task_loss": 0.477403461933136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42810606956481934, "epoch": 5.12, "learning_rate": 1.3416254075594736e-05, "loss": 0.4809, "step": 6059, "task_loss": 0.62047278881073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30597326159477234, "epoch": 5.12, "learning_rate": 1.3410216157468905e-05, "loss": 0.3824, "step": 6060, "task_loss": 0.16076746582984924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6116683483123779, "epoch": 5.12, "learning_rate": 1.3404178239343077e-05, "loss": 0.6654, "step": 6061, "task_loss": 2.1852352619171143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2726067900657654, "epoch": 5.12, "learning_rate": 1.3398140321217246e-05, "loss": 0.3771, "step": 6062, "task_loss": 0.23471403121948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3169618248939514, "epoch": 5.13, "learning_rate": 1.3392102403091413e-05, "loss": 0.3747, "step": 6063, "task_loss": 0.10524037480354309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7403062582015991, "epoch": 5.13, "learning_rate": 1.3386064484965585e-05, "loss": 0.4934, "step": 6064, "task_loss": 0.8136484622955322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5489674806594849, "epoch": 5.13, "learning_rate": 1.3380026566839754e-05, "loss": 0.4284, "step": 6065, "task_loss": 0.5061486959457397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38511645793914795, "epoch": 5.13, "learning_rate": 1.3373988648713925e-05, "loss": 0.4308, "step": 6066, "task_loss": 0.4862314462661743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3233916759490967, "epoch": 5.13, "learning_rate": 1.3367950730588094e-05, "loss": 0.5189, "step": 6067, "task_loss": 0.5849013328552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4537801742553711, "epoch": 5.13, "learning_rate": 1.3361912812462263e-05, "loss": 0.6275, "step": 6068, "task_loss": 0.7423861026763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1965762972831726, "epoch": 5.13, "learning_rate": 1.3355874894336435e-05, "loss": 0.4594, "step": 6069, "task_loss": 0.18746179342269897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40123844146728516, "epoch": 5.13, "learning_rate": 1.3349836976210604e-05, "loss": 0.5237, "step": 6070, "task_loss": 0.7818856835365295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6137521266937256, "epoch": 5.13, "learning_rate": 1.3343799058084774e-05, "loss": 0.5028, "step": 6071, "task_loss": 1.7472507953643799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5355037450790405, "epoch": 5.13, "learning_rate": 1.3337761139958943e-05, "loss": 0.5622, "step": 6072, "task_loss": 1.208091378211975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32960784435272217, "epoch": 5.13, "learning_rate": 1.3331723221833112e-05, "loss": 0.4141, "step": 6073, "task_loss": 0.3296586275100708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.446250855922699, "epoch": 5.13, "learning_rate": 1.3325685303707283e-05, "loss": 0.5339, "step": 6074, "task_loss": 0.9995641708374023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6211857199668884, "epoch": 5.14, "learning_rate": 1.3319647385581451e-05, "loss": 0.5358, "step": 6075, "task_loss": 0.7238313555717468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24836868047714233, "epoch": 5.14, "learning_rate": 1.3313609467455624e-05, "loss": 0.4539, "step": 6076, "task_loss": 0.028985394164919853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7105672359466553, "epoch": 5.14, "learning_rate": 1.3307571549329793e-05, "loss": 0.5364, "step": 6077, "task_loss": 1.4642765522003174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29775524139404297, "epoch": 5.14, "learning_rate": 1.3301533631203961e-05, "loss": 0.4736, "step": 6078, "task_loss": 1.415164589881897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42882752418518066, "epoch": 5.14, "learning_rate": 1.3295495713078132e-05, "loss": 0.4568, "step": 6079, "task_loss": 0.4393989145755768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28087925910949707, "epoch": 5.14, "learning_rate": 1.3289457794952301e-05, "loss": 0.4659, "step": 6080, "task_loss": 0.3817955553531647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3529781103134155, "epoch": 5.14, "learning_rate": 1.328341987682647e-05, "loss": 0.4628, "step": 6081, "task_loss": 0.14105166494846344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2521512508392334, "epoch": 5.14, "learning_rate": 1.327738195870064e-05, "loss": 0.4198, "step": 6082, "task_loss": 0.8012117743492126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26989591121673584, "epoch": 5.14, "learning_rate": 1.327134404057481e-05, "loss": 0.5553, "step": 6083, "task_loss": 0.8267058730125427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3994668424129486, "epoch": 5.14, "learning_rate": 1.3265306122448982e-05, "loss": 0.4681, "step": 6084, "task_loss": 0.7029871940612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2228318452835083, "epoch": 5.14, "learning_rate": 1.325926820432315e-05, "loss": 0.56, "step": 6085, "task_loss": 0.6692995429039001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47748300433158875, "epoch": 5.14, "learning_rate": 1.325323028619732e-05, "loss": 0.5437, "step": 6086, "task_loss": 1.038307547569275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7062476277351379, "epoch": 5.15, "learning_rate": 1.324719236807149e-05, "loss": 0.5343, "step": 6087, "task_loss": 0.4333077371120453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.517061173915863, "epoch": 5.15, "learning_rate": 1.3241154449945659e-05, "loss": 0.5794, "step": 6088, "task_loss": 0.7897264957427979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48791441321372986, "epoch": 5.15, "learning_rate": 1.323511653181983e-05, "loss": 0.4022, "step": 6089, "task_loss": 0.5231915712356567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2645205557346344, "epoch": 5.15, "learning_rate": 1.3229078613693998e-05, "loss": 0.4335, "step": 6090, "task_loss": 0.1047622561454773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40866953134536743, "epoch": 5.15, "learning_rate": 1.3223040695568167e-05, "loss": 0.424, "step": 6091, "task_loss": 0.47545912861824036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.516728401184082, "epoch": 5.15, "learning_rate": 1.321700277744234e-05, "loss": 0.452, "step": 6092, "task_loss": 0.6408304572105408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4165635108947754, "epoch": 5.15, "learning_rate": 1.3210964859316508e-05, "loss": 0.4679, "step": 6093, "task_loss": 0.5097827911376953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34020093083381653, "epoch": 5.15, "learning_rate": 1.3204926941190679e-05, "loss": 0.4782, "step": 6094, "task_loss": 0.34038764238357544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35751307010650635, "epoch": 5.15, "learning_rate": 1.3198889023064848e-05, "loss": 0.4585, "step": 6095, "task_loss": 0.8555724024772644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.754215657711029, "epoch": 5.15, "learning_rate": 1.3192851104939017e-05, "loss": 0.6339, "step": 6096, "task_loss": 1.6852246522903442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3186350464820862, "epoch": 5.15, "learning_rate": 1.3186813186813187e-05, "loss": 0.3725, "step": 6097, "task_loss": 1.0746930837631226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5849156975746155, "epoch": 5.15, "learning_rate": 1.3180775268687356e-05, "loss": 0.6488, "step": 6098, "task_loss": 1.3260470628738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42752474546432495, "epoch": 5.16, "learning_rate": 1.3174737350561528e-05, "loss": 0.5524, "step": 6099, "task_loss": 0.8044757843017578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2714477777481079, "epoch": 5.16, "learning_rate": 1.3168699432435697e-05, "loss": 0.467, "step": 6100, "task_loss": 0.45615917444229126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5291986465454102, "epoch": 5.16, "learning_rate": 1.3162661514309866e-05, "loss": 0.6256, "step": 6101, "task_loss": 2.1692802906036377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5348755717277527, "epoch": 5.16, "learning_rate": 1.3156623596184037e-05, "loss": 0.5042, "step": 6102, "task_loss": 0.2975325286388397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8818687200546265, "epoch": 5.16, "learning_rate": 1.3150585678058205e-05, "loss": 0.5393, "step": 6103, "task_loss": 1.2022050619125366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.442829966545105, "epoch": 5.16, "learning_rate": 1.3144547759932378e-05, "loss": 0.3998, "step": 6104, "task_loss": 0.4400947093963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.460046648979187, "epoch": 5.16, "learning_rate": 1.3138509841806545e-05, "loss": 0.3714, "step": 6105, "task_loss": 0.2084684669971466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5473044514656067, "epoch": 5.16, "learning_rate": 1.3132471923680714e-05, "loss": 0.4591, "step": 6106, "task_loss": 0.363830029964447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.374272882938385, "epoch": 5.16, "learning_rate": 1.3126434005554886e-05, "loss": 0.4842, "step": 6107, "task_loss": 0.4404298663139343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29356062412261963, "epoch": 5.16, "learning_rate": 1.3120396087429055e-05, "loss": 0.5056, "step": 6108, "task_loss": 0.3627863824367523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4361107349395752, "epoch": 5.16, "learning_rate": 1.3114358169303225e-05, "loss": 0.5519, "step": 6109, "task_loss": 1.5569052696228027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4370635747909546, "epoch": 5.16, "learning_rate": 1.3108320251177394e-05, "loss": 0.3546, "step": 6110, "task_loss": 1.0480105876922607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4578360915184021, "epoch": 5.17, "learning_rate": 1.3102282333051563e-05, "loss": 0.4592, "step": 6111, "task_loss": 0.5150473713874817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5022355318069458, "epoch": 5.17, "learning_rate": 1.3096244414925734e-05, "loss": 0.5014, "step": 6112, "task_loss": 0.7386837601661682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35836079716682434, "epoch": 5.17, "learning_rate": 1.3090206496799903e-05, "loss": 0.5635, "step": 6113, "task_loss": 0.46500322222709656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45011839270591736, "epoch": 5.17, "learning_rate": 1.3084168578674075e-05, "loss": 0.5577, "step": 6114, "task_loss": 0.7670868039131165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39528197050094604, "epoch": 5.17, "learning_rate": 1.3078130660548244e-05, "loss": 0.4614, "step": 6115, "task_loss": 0.14794820547103882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5256476402282715, "epoch": 5.17, "learning_rate": 1.3072092742422413e-05, "loss": 0.4468, "step": 6116, "task_loss": 0.8338684439659119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31768783926963806, "epoch": 5.17, "learning_rate": 1.3066054824296583e-05, "loss": 0.4017, "step": 6117, "task_loss": 0.35649555921554565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4154629111289978, "epoch": 5.17, "learning_rate": 1.3060016906170752e-05, "loss": 0.4602, "step": 6118, "task_loss": 0.9593022465705872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.100689172744751, "epoch": 5.17, "learning_rate": 1.3053978988044924e-05, "loss": 0.6215, "step": 6119, "task_loss": 0.8713032603263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25431886315345764, "epoch": 5.17, "learning_rate": 1.3047941069919092e-05, "loss": 0.382, "step": 6120, "task_loss": 0.19224536418914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29706811904907227, "epoch": 5.17, "learning_rate": 1.304190315179326e-05, "loss": 0.4793, "step": 6121, "task_loss": 1.097525715827942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4011467397212982, "epoch": 5.17, "learning_rate": 1.3035865233667433e-05, "loss": 0.5848, "step": 6122, "task_loss": 0.6773102283477783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6201338768005371, "epoch": 5.18, "learning_rate": 1.3029827315541602e-05, "loss": 0.4676, "step": 6123, "task_loss": 0.5619208812713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.574016809463501, "epoch": 5.18, "learning_rate": 1.3023789397415772e-05, "loss": 0.4973, "step": 6124, "task_loss": 0.7853434681892395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49159732460975647, "epoch": 5.18, "learning_rate": 1.3017751479289941e-05, "loss": 0.4246, "step": 6125, "task_loss": 0.8851189613342285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7109062671661377, "epoch": 5.18, "learning_rate": 1.301171356116411e-05, "loss": 0.5848, "step": 6126, "task_loss": 0.7335724830627441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33137527108192444, "epoch": 5.18, "learning_rate": 1.3005675643038282e-05, "loss": 0.3636, "step": 6127, "task_loss": 0.07704176008701324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7831236124038696, "epoch": 5.18, "learning_rate": 1.299963772491245e-05, "loss": 0.4737, "step": 6128, "task_loss": 0.3302163779735565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5318394303321838, "epoch": 5.18, "learning_rate": 1.2993599806786622e-05, "loss": 0.4525, "step": 6129, "task_loss": 0.9210467338562012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3585624098777771, "epoch": 5.18, "learning_rate": 1.298756188866079e-05, "loss": 0.4353, "step": 6130, "task_loss": 0.7049155831336975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37324902415275574, "epoch": 5.18, "learning_rate": 1.298152397053496e-05, "loss": 0.4852, "step": 6131, "task_loss": 1.2507636547088623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49834734201431274, "epoch": 5.18, "learning_rate": 1.297548605240913e-05, "loss": 0.4704, "step": 6132, "task_loss": 0.9661620855331421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3737214207649231, "epoch": 5.18, "learning_rate": 1.2969448134283299e-05, "loss": 0.4332, "step": 6133, "task_loss": 0.39389070868492126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6448582410812378, "epoch": 5.19, "learning_rate": 1.2963410216157471e-05, "loss": 0.5645, "step": 6134, "task_loss": 0.9474178552627563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5322898626327515, "epoch": 5.19, "learning_rate": 1.295737229803164e-05, "loss": 0.4236, "step": 6135, "task_loss": 0.6148312091827393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.378815621137619, "epoch": 5.19, "learning_rate": 1.2951334379905807e-05, "loss": 0.395, "step": 6136, "task_loss": 0.9980775713920593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6200602650642395, "epoch": 5.19, "learning_rate": 1.294529646177998e-05, "loss": 0.4538, "step": 6137, "task_loss": 1.2150746583938599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38590967655181885, "epoch": 5.19, "learning_rate": 1.2939258543654148e-05, "loss": 0.442, "step": 6138, "task_loss": 0.639130175113678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3706420063972473, "epoch": 5.19, "learning_rate": 1.2933220625528319e-05, "loss": 0.3614, "step": 6139, "task_loss": 0.08359649777412415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47983962297439575, "epoch": 5.19, "learning_rate": 1.2927182707402488e-05, "loss": 0.3774, "step": 6140, "task_loss": 0.1693434864282608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5580073595046997, "epoch": 5.19, "learning_rate": 1.2921144789276657e-05, "loss": 0.4991, "step": 6141, "task_loss": 0.9490946531295776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36639636754989624, "epoch": 5.19, "learning_rate": 1.2915106871150829e-05, "loss": 0.4909, "step": 6142, "task_loss": 0.3023679256439209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3850104510784149, "epoch": 5.19, "learning_rate": 1.2909068953024998e-05, "loss": 0.5837, "step": 6143, "task_loss": 0.10401973128318787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4688534736633301, "epoch": 5.19, "learning_rate": 1.2903031034899168e-05, "loss": 0.4782, "step": 6144, "task_loss": 0.6819477677345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4195156693458557, "epoch": 5.19, "learning_rate": 1.2896993116773337e-05, "loss": 0.5362, "step": 6145, "task_loss": 0.8169540762901306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24895665049552917, "epoch": 5.2, "learning_rate": 1.2890955198647506e-05, "loss": 0.3616, "step": 6146, "task_loss": 0.8087191581726074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37978583574295044, "epoch": 5.2, "learning_rate": 1.2884917280521677e-05, "loss": 0.464, "step": 6147, "task_loss": 0.2558240294456482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2798442840576172, "epoch": 5.2, "learning_rate": 1.2878879362395846e-05, "loss": 0.3981, "step": 6148, "task_loss": 0.7067782878875732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4613914489746094, "epoch": 5.2, "learning_rate": 1.2872841444270018e-05, "loss": 0.3899, "step": 6149, "task_loss": 0.5899757146835327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2541161775588989, "epoch": 5.2, "learning_rate": 1.2866803526144187e-05, "loss": 0.4244, "step": 6150, "task_loss": 0.5561696290969849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2999497652053833, "epoch": 5.2, "learning_rate": 1.2860765608018356e-05, "loss": 0.3665, "step": 6151, "task_loss": 0.3899548649787903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2719145715236664, "epoch": 5.2, "learning_rate": 1.2854727689892526e-05, "loss": 0.4217, "step": 6152, "task_loss": 0.22736620903015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4711979925632477, "epoch": 5.2, "learning_rate": 1.2848689771766695e-05, "loss": 0.4927, "step": 6153, "task_loss": 0.9362608790397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2930052876472473, "epoch": 5.2, "learning_rate": 1.2842651853640866e-05, "loss": 0.547, "step": 6154, "task_loss": 0.4726117253303528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6689135432243347, "epoch": 5.2, "learning_rate": 1.2836613935515034e-05, "loss": 0.4096, "step": 6155, "task_loss": 0.9032557010650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.424170583486557, "epoch": 5.2, "learning_rate": 1.2830576017389203e-05, "loss": 0.5204, "step": 6156, "task_loss": 1.4358208179473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.633333146572113, "epoch": 5.2, "learning_rate": 1.2824538099263376e-05, "loss": 0.5292, "step": 6157, "task_loss": 1.0741466283798218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37188035249710083, "epoch": 5.21, "learning_rate": 1.2818500181137544e-05, "loss": 0.3608, "step": 6158, "task_loss": 0.919540286064148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40353313088417053, "epoch": 5.21, "learning_rate": 1.2812462263011715e-05, "loss": 0.4106, "step": 6159, "task_loss": 0.6638248562812805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32889360189437866, "epoch": 5.21, "learning_rate": 1.2806424344885884e-05, "loss": 0.3727, "step": 6160, "task_loss": 0.14907263219356537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44253841042518616, "epoch": 5.21, "learning_rate": 1.2800386426760053e-05, "loss": 0.5595, "step": 6161, "task_loss": 0.4107782542705536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5456867814064026, "epoch": 5.21, "learning_rate": 1.2794348508634223e-05, "loss": 0.5208, "step": 6162, "task_loss": 1.5941705703735352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.493611216545105, "epoch": 5.21, "learning_rate": 1.2788310590508392e-05, "loss": 0.5042, "step": 6163, "task_loss": 1.0247541666030884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3051633834838867, "epoch": 5.21, "learning_rate": 1.2782272672382565e-05, "loss": 0.3439, "step": 6164, "task_loss": 0.2997063398361206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2845070958137512, "epoch": 5.21, "learning_rate": 1.2776234754256733e-05, "loss": 0.4036, "step": 6165, "task_loss": 0.674467146396637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38586676120758057, "epoch": 5.21, "learning_rate": 1.2770196836130902e-05, "loss": 0.3496, "step": 6166, "task_loss": 0.29818442463874817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45291945338249207, "epoch": 5.21, "learning_rate": 1.2764158918005073e-05, "loss": 0.6866, "step": 6167, "task_loss": 0.47581860423088074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5666059851646423, "epoch": 5.21, "learning_rate": 1.2758120999879242e-05, "loss": 0.5422, "step": 6168, "task_loss": 0.47892069816589355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2942771911621094, "epoch": 5.21, "learning_rate": 1.2752083081753414e-05, "loss": 0.4333, "step": 6169, "task_loss": 0.787386417388916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40701723098754883, "epoch": 5.22, "learning_rate": 1.2746045163627581e-05, "loss": 0.5495, "step": 6170, "task_loss": 0.9545518159866333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4142760932445526, "epoch": 5.22, "learning_rate": 1.274000724550175e-05, "loss": 0.4105, "step": 6171, "task_loss": 1.2535786628723145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4161740243434906, "epoch": 5.22, "learning_rate": 1.2733969327375922e-05, "loss": 0.4184, "step": 6172, "task_loss": 0.6351439952850342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4108370542526245, "epoch": 5.22, "learning_rate": 1.2727931409250091e-05, "loss": 0.5608, "step": 6173, "task_loss": 0.3379417955875397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3723337650299072, "epoch": 5.22, "learning_rate": 1.2721893491124262e-05, "loss": 0.3661, "step": 6174, "task_loss": 0.4654655456542969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38027501106262207, "epoch": 5.22, "learning_rate": 1.271585557299843e-05, "loss": 0.5391, "step": 6175, "task_loss": 0.8787911534309387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33820420503616333, "epoch": 5.22, "learning_rate": 1.27098176548726e-05, "loss": 0.53, "step": 6176, "task_loss": 1.0738377571105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2917509377002716, "epoch": 5.22, "learning_rate": 1.270377973674677e-05, "loss": 0.4206, "step": 6177, "task_loss": 0.7680242657661438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5530897378921509, "epoch": 5.22, "learning_rate": 1.2697741818620939e-05, "loss": 0.421, "step": 6178, "task_loss": 1.0556491613388062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5422264337539673, "epoch": 5.22, "learning_rate": 1.2691703900495111e-05, "loss": 0.566, "step": 6179, "task_loss": 0.9877161383628845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39203545451164246, "epoch": 5.22, "learning_rate": 1.268566598236928e-05, "loss": 0.3573, "step": 6180, "task_loss": 0.2709059715270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3794826567173004, "epoch": 5.22, "learning_rate": 1.2679628064243449e-05, "loss": 0.4887, "step": 6181, "task_loss": 0.9890578985214233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4647165536880493, "epoch": 5.23, "learning_rate": 1.267359014611762e-05, "loss": 0.4008, "step": 6182, "task_loss": 0.3363933563232422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5746468305587769, "epoch": 5.23, "learning_rate": 1.2667552227991788e-05, "loss": 0.5554, "step": 6183, "task_loss": 1.390650987625122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.502027690410614, "epoch": 5.23, "learning_rate": 1.266151430986596e-05, "loss": 0.5143, "step": 6184, "task_loss": 0.7353254556655884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3463137447834015, "epoch": 5.23, "learning_rate": 1.2655476391740128e-05, "loss": 0.4681, "step": 6185, "task_loss": 1.13534414768219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44857269525527954, "epoch": 5.23, "learning_rate": 1.2649438473614297e-05, "loss": 0.4778, "step": 6186, "task_loss": 0.6780846118927002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3229963183403015, "epoch": 5.23, "learning_rate": 1.2643400555488469e-05, "loss": 0.3875, "step": 6187, "task_loss": 0.7619109749794006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44008105993270874, "epoch": 5.23, "learning_rate": 1.2637362637362638e-05, "loss": 0.448, "step": 6188, "task_loss": 0.08472947776317596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2771660387516022, "epoch": 5.23, "learning_rate": 1.2631324719236808e-05, "loss": 0.4453, "step": 6189, "task_loss": 0.1630460023880005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4467581510543823, "epoch": 5.23, "learning_rate": 1.2625286801110977e-05, "loss": 0.491, "step": 6190, "task_loss": 0.2543722093105316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37369856238365173, "epoch": 5.23, "learning_rate": 1.2619248882985146e-05, "loss": 0.3938, "step": 6191, "task_loss": 0.5769704580307007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4213883876800537, "epoch": 5.23, "learning_rate": 1.2613210964859318e-05, "loss": 0.6675, "step": 6192, "task_loss": 0.6167872548103333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30494827032089233, "epoch": 5.23, "learning_rate": 1.2607173046733486e-05, "loss": 0.3618, "step": 6193, "task_loss": 0.6337176561355591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4974883198738098, "epoch": 5.24, "learning_rate": 1.2601135128607658e-05, "loss": 0.6062, "step": 6194, "task_loss": 0.8943815231323242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27975523471832275, "epoch": 5.24, "learning_rate": 1.2595097210481827e-05, "loss": 0.5089, "step": 6195, "task_loss": 0.20117653906345367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30887091159820557, "epoch": 5.24, "learning_rate": 1.2589059292355996e-05, "loss": 0.3348, "step": 6196, "task_loss": 0.378909170627594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35314202308654785, "epoch": 5.24, "learning_rate": 1.2583021374230166e-05, "loss": 0.4535, "step": 6197, "task_loss": 0.6679708361625671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47578901052474976, "epoch": 5.24, "learning_rate": 1.2576983456104335e-05, "loss": 0.4455, "step": 6198, "task_loss": 0.04629099369049072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40176236629486084, "epoch": 5.24, "learning_rate": 1.2570945537978507e-05, "loss": 0.4337, "step": 6199, "task_loss": 0.5134381055831909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49800628423690796, "epoch": 5.24, "learning_rate": 1.2564907619852676e-05, "loss": 0.4757, "step": 6200, "task_loss": 0.21907542645931244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20796847343444824, "epoch": 5.24, "learning_rate": 1.2558869701726843e-05, "loss": 0.3775, "step": 6201, "task_loss": 0.034181609749794006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5537691116333008, "epoch": 5.24, "learning_rate": 1.2552831783601016e-05, "loss": 0.5178, "step": 6202, "task_loss": 0.7014821767807007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33283016085624695, "epoch": 5.24, "learning_rate": 1.2546793865475185e-05, "loss": 0.4908, "step": 6203, "task_loss": 0.24621935188770294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4907032251358032, "epoch": 5.24, "learning_rate": 1.2540755947349355e-05, "loss": 0.5118, "step": 6204, "task_loss": 0.18759043514728546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.342742919921875, "epoch": 5.24, "learning_rate": 1.2534718029223524e-05, "loss": 0.4898, "step": 6205, "task_loss": 0.7270870208740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4542190730571747, "epoch": 5.25, "learning_rate": 1.2528680111097693e-05, "loss": 0.4588, "step": 6206, "task_loss": 0.7517849206924438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.451686829328537, "epoch": 5.25, "learning_rate": 1.2522642192971865e-05, "loss": 0.4712, "step": 6207, "task_loss": 1.12918221950531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4083024859428406, "epoch": 5.25, "learning_rate": 1.2516604274846034e-05, "loss": 0.4374, "step": 6208, "task_loss": 0.0881195217370987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5862739086151123, "epoch": 5.25, "learning_rate": 1.2510566356720205e-05, "loss": 0.4768, "step": 6209, "task_loss": 0.39463233947753906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5894033312797546, "epoch": 5.25, "learning_rate": 1.2504528438594374e-05, "loss": 0.5225, "step": 6210, "task_loss": 0.5741052627563477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5622382164001465, "epoch": 5.25, "learning_rate": 1.2498490520468544e-05, "loss": 0.6311, "step": 6211, "task_loss": 0.6506113409996033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7736029624938965, "epoch": 5.25, "learning_rate": 1.2492452602342713e-05, "loss": 0.4799, "step": 6212, "task_loss": 0.6603603959083557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2468295395374298, "epoch": 5.25, "learning_rate": 1.2486414684216882e-05, "loss": 0.4483, "step": 6213, "task_loss": 0.14578548073768616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6984173059463501, "epoch": 5.25, "learning_rate": 1.2480376766091052e-05, "loss": 0.5554, "step": 6214, "task_loss": 1.0485152006149292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41591858863830566, "epoch": 5.25, "learning_rate": 1.2474338847965223e-05, "loss": 0.4275, "step": 6215, "task_loss": 1.4896053075790405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.602888822555542, "epoch": 5.25, "learning_rate": 1.2468300929839392e-05, "loss": 0.49, "step": 6216, "task_loss": 1.1430680751800537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31008094549179077, "epoch": 5.26, "learning_rate": 1.246226301171356e-05, "loss": 0.3567, "step": 6217, "task_loss": 0.8474169969558716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.13234040141105652, "epoch": 5.26, "learning_rate": 1.2456225093587731e-05, "loss": 0.3969, "step": 6218, "task_loss": 0.6200963258743286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2713158428668976, "epoch": 5.26, "learning_rate": 1.2450187175461902e-05, "loss": 0.3204, "step": 6219, "task_loss": 0.11687406152486801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48578497767448425, "epoch": 5.26, "learning_rate": 1.244414925733607e-05, "loss": 0.5868, "step": 6220, "task_loss": 0.9350218176841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5125357508659363, "epoch": 5.26, "learning_rate": 1.2438111339210241e-05, "loss": 0.4309, "step": 6221, "task_loss": 0.16614003479480743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25036102533340454, "epoch": 5.26, "learning_rate": 1.243207342108441e-05, "loss": 0.3975, "step": 6222, "task_loss": 0.17539043724536896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3514832556247711, "epoch": 5.26, "learning_rate": 1.242603550295858e-05, "loss": 0.468, "step": 6223, "task_loss": 0.16145166754722595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5061763525009155, "epoch": 5.26, "learning_rate": 1.241999758483275e-05, "loss": 0.4297, "step": 6224, "task_loss": 0.1512872576713562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5372095108032227, "epoch": 5.26, "learning_rate": 1.241395966670692e-05, "loss": 0.5252, "step": 6225, "task_loss": 1.8842012882232666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3596974015235901, "epoch": 5.26, "learning_rate": 1.240792174858109e-05, "loss": 0.3819, "step": 6226, "task_loss": 0.687288224697113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4767927825450897, "epoch": 5.26, "learning_rate": 1.240188383045526e-05, "loss": 0.4024, "step": 6227, "task_loss": 0.7534205317497253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6467216610908508, "epoch": 5.26, "learning_rate": 1.2395845912329429e-05, "loss": 0.6204, "step": 6228, "task_loss": 1.6777626276016235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4914214611053467, "epoch": 5.27, "learning_rate": 1.2389807994203599e-05, "loss": 0.4628, "step": 6229, "task_loss": 0.7413581013679504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4860031008720398, "epoch": 5.27, "learning_rate": 1.238377007607777e-05, "loss": 0.4634, "step": 6230, "task_loss": 0.4979691505432129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28255903720855713, "epoch": 5.27, "learning_rate": 1.2377732157951939e-05, "loss": 0.4363, "step": 6231, "task_loss": 0.2801055610179901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.11712111532688141, "epoch": 5.27, "learning_rate": 1.2371694239826107e-05, "loss": 0.4164, "step": 6232, "task_loss": 0.02010384574532509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5305463075637817, "epoch": 5.27, "learning_rate": 1.2365656321700278e-05, "loss": 0.5305, "step": 6233, "task_loss": 1.7402743101119995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3652735948562622, "epoch": 5.27, "learning_rate": 1.2359618403574449e-05, "loss": 0.5376, "step": 6234, "task_loss": 0.7852202653884888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35535115003585815, "epoch": 5.27, "learning_rate": 1.2353580485448617e-05, "loss": 0.4457, "step": 6235, "task_loss": 0.2476380616426468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5949093699455261, "epoch": 5.27, "learning_rate": 1.2347542567322788e-05, "loss": 0.4455, "step": 6236, "task_loss": 0.11201959103345871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4688261151313782, "epoch": 5.27, "learning_rate": 1.2341504649196957e-05, "loss": 0.4586, "step": 6237, "task_loss": 0.3446193337440491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44974154233932495, "epoch": 5.27, "learning_rate": 1.2335466731071127e-05, "loss": 0.4865, "step": 6238, "task_loss": 0.6242192983627319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5322498083114624, "epoch": 5.27, "learning_rate": 1.2329428812945296e-05, "loss": 0.3348, "step": 6239, "task_loss": 0.3575191795825958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7311123609542847, "epoch": 5.27, "learning_rate": 1.2323390894819467e-05, "loss": 0.5272, "step": 6240, "task_loss": 0.297283411026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5120882987976074, "epoch": 5.28, "learning_rate": 1.2317352976693638e-05, "loss": 0.4282, "step": 6241, "task_loss": 1.5276340246200562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6023414134979248, "epoch": 5.28, "learning_rate": 1.2311315058567806e-05, "loss": 0.4943, "step": 6242, "task_loss": 0.6430397033691406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32146820425987244, "epoch": 5.28, "learning_rate": 1.2305277140441975e-05, "loss": 0.4142, "step": 6243, "task_loss": 1.1973849534988403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2361760437488556, "epoch": 5.28, "learning_rate": 1.2299239222316146e-05, "loss": 0.3907, "step": 6244, "task_loss": 0.5391025543212891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4394904375076294, "epoch": 5.28, "learning_rate": 1.2293201304190316e-05, "loss": 0.4955, "step": 6245, "task_loss": 0.9576830863952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5332996249198914, "epoch": 5.28, "learning_rate": 1.2287163386064485e-05, "loss": 0.4263, "step": 6246, "task_loss": 0.47600847482681274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5218952894210815, "epoch": 5.28, "learning_rate": 1.2281125467938654e-05, "loss": 0.4282, "step": 6247, "task_loss": 0.42939120531082153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3090490400791168, "epoch": 5.28, "learning_rate": 1.2275087549812825e-05, "loss": 0.4182, "step": 6248, "task_loss": 0.4032343029975891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5421972870826721, "epoch": 5.28, "learning_rate": 1.2269049631686995e-05, "loss": 0.4019, "step": 6249, "task_loss": 1.0163698196411133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5863146781921387, "epoch": 5.28, "learning_rate": 1.2263011713561164e-05, "loss": 0.4033, "step": 6250, "task_loss": 1.1737909317016602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18500067293643951, "epoch": 5.28, "learning_rate": 1.2256973795435335e-05, "loss": 0.529, "step": 6251, "task_loss": 1.091176986694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44266489148139954, "epoch": 5.28, "learning_rate": 1.2250935877309504e-05, "loss": 0.4928, "step": 6252, "task_loss": 0.6189447641372681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3941328227519989, "epoch": 5.29, "learning_rate": 1.2244897959183674e-05, "loss": 0.4469, "step": 6253, "task_loss": 0.41837334632873535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4661310911178589, "epoch": 5.29, "learning_rate": 1.2238860041057843e-05, "loss": 0.4981, "step": 6254, "task_loss": 0.5334681272506714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46993929147720337, "epoch": 5.29, "learning_rate": 1.2232822122932014e-05, "loss": 0.4511, "step": 6255, "task_loss": 0.5718578696250916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6415864825248718, "epoch": 5.29, "learning_rate": 1.2226784204806184e-05, "loss": 0.5874, "step": 6256, "task_loss": 1.166537880897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5136687755584717, "epoch": 5.29, "learning_rate": 1.2220746286680353e-05, "loss": 0.4656, "step": 6257, "task_loss": 0.42546728253364563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2938166856765747, "epoch": 5.29, "learning_rate": 1.2214708368554522e-05, "loss": 0.5055, "step": 6258, "task_loss": 0.4821000397205353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5102195143699646, "epoch": 5.29, "learning_rate": 1.2208670450428693e-05, "loss": 0.4723, "step": 6259, "task_loss": 0.3282877206802368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30259495973587036, "epoch": 5.29, "learning_rate": 1.2202632532302863e-05, "loss": 0.5012, "step": 6260, "task_loss": 1.1107146739959717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41291919350624084, "epoch": 5.29, "learning_rate": 1.2196594614177034e-05, "loss": 0.4243, "step": 6261, "task_loss": 0.42582619190216064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4471568167209625, "epoch": 5.29, "learning_rate": 1.2190556696051201e-05, "loss": 0.3573, "step": 6262, "task_loss": 0.36311691999435425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.309916615486145, "epoch": 5.29, "learning_rate": 1.2184518777925371e-05, "loss": 0.478, "step": 6263, "task_loss": 0.06853463500738144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3928259611129761, "epoch": 5.29, "learning_rate": 1.2178480859799542e-05, "loss": 0.4288, "step": 6264, "task_loss": 0.7260500192642212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4291144907474518, "epoch": 5.3, "learning_rate": 1.2172442941673713e-05, "loss": 0.4722, "step": 6265, "task_loss": 0.44616806507110596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44337618350982666, "epoch": 5.3, "learning_rate": 1.2166405023547881e-05, "loss": 0.431, "step": 6266, "task_loss": 0.6352015733718872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2501221001148224, "epoch": 5.3, "learning_rate": 1.216036710542205e-05, "loss": 0.3787, "step": 6267, "task_loss": 0.17508773505687714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37083515524864197, "epoch": 5.3, "learning_rate": 1.2154329187296221e-05, "loss": 0.4316, "step": 6268, "task_loss": 0.5260753631591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37162959575653076, "epoch": 5.3, "learning_rate": 1.2148291269170391e-05, "loss": 0.3987, "step": 6269, "task_loss": 0.458967000246048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2888850271701813, "epoch": 5.3, "learning_rate": 1.214225335104456e-05, "loss": 0.6822, "step": 6270, "task_loss": 0.034992583096027374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3902198374271393, "epoch": 5.3, "learning_rate": 1.2136215432918731e-05, "loss": 0.4673, "step": 6271, "task_loss": 0.20604254305362701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4453846514225006, "epoch": 5.3, "learning_rate": 1.21301775147929e-05, "loss": 0.5365, "step": 6272, "task_loss": 0.8998722434043884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.358317106962204, "epoch": 5.3, "learning_rate": 1.212413959666707e-05, "loss": 0.6121, "step": 6273, "task_loss": 0.7969244718551636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2586548626422882, "epoch": 5.3, "learning_rate": 1.211810167854124e-05, "loss": 0.3253, "step": 6274, "task_loss": 0.20838871598243713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2985243797302246, "epoch": 5.3, "learning_rate": 1.211206376041541e-05, "loss": 0.3625, "step": 6275, "task_loss": 0.08204582333564758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3770885765552521, "epoch": 5.3, "learning_rate": 1.210602584228958e-05, "loss": 0.4141, "step": 6276, "task_loss": 0.6199331283569336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.556286096572876, "epoch": 5.31, "learning_rate": 1.209998792416375e-05, "loss": 0.4459, "step": 6277, "task_loss": 1.3019596338272095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6244060397148132, "epoch": 5.31, "learning_rate": 1.2093950006037918e-05, "loss": 0.4963, "step": 6278, "task_loss": 0.35750284790992737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3581559360027313, "epoch": 5.31, "learning_rate": 1.2087912087912089e-05, "loss": 0.3596, "step": 6279, "task_loss": 0.23802399635314941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8221668004989624, "epoch": 5.31, "learning_rate": 1.208187416978626e-05, "loss": 0.6341, "step": 6280, "task_loss": 1.0204393863677979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24638521671295166, "epoch": 5.31, "learning_rate": 1.2075836251660428e-05, "loss": 0.3426, "step": 6281, "task_loss": 0.553345263004303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3210230767726898, "epoch": 5.31, "learning_rate": 1.2069798333534597e-05, "loss": 0.4162, "step": 6282, "task_loss": 0.5214501619338989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5407940745353699, "epoch": 5.31, "learning_rate": 1.2063760415408768e-05, "loss": 0.4556, "step": 6283, "task_loss": 0.727148175239563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5306996703147888, "epoch": 5.31, "learning_rate": 1.2057722497282938e-05, "loss": 0.5674, "step": 6284, "task_loss": 0.9735710024833679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6272885799407959, "epoch": 5.31, "learning_rate": 1.2051684579157107e-05, "loss": 0.4548, "step": 6285, "task_loss": 0.955990731716156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4695785641670227, "epoch": 5.31, "learning_rate": 1.2045646661031278e-05, "loss": 0.4691, "step": 6286, "task_loss": 0.44260072708129883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38654008507728577, "epoch": 5.31, "learning_rate": 1.2039608742905447e-05, "loss": 0.3902, "step": 6287, "task_loss": 0.19603301584720612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6396925449371338, "epoch": 5.32, "learning_rate": 1.2033570824779617e-05, "loss": 0.4629, "step": 6288, "task_loss": 1.0044513940811157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3387939929962158, "epoch": 5.32, "learning_rate": 1.2027532906653786e-05, "loss": 0.383, "step": 6289, "task_loss": 0.30343323945999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7601823210716248, "epoch": 5.32, "learning_rate": 1.2021494988527957e-05, "loss": 0.5635, "step": 6290, "task_loss": 0.3581352233886719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33006906509399414, "epoch": 5.32, "learning_rate": 1.2015457070402127e-05, "loss": 0.4231, "step": 6291, "task_loss": 0.6971384882926941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42513829469680786, "epoch": 5.32, "learning_rate": 1.2009419152276296e-05, "loss": 0.3747, "step": 6292, "task_loss": 0.7543697953224182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8252288699150085, "epoch": 5.32, "learning_rate": 1.2003381234150465e-05, "loss": 0.3949, "step": 6293, "task_loss": 0.9222071766853333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.610644519329071, "epoch": 5.32, "learning_rate": 1.1997343316024635e-05, "loss": 0.4701, "step": 6294, "task_loss": 0.4978223145008087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7126344442367554, "epoch": 5.32, "learning_rate": 1.1991305397898806e-05, "loss": 0.5721, "step": 6295, "task_loss": 0.9451578259468079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3959302306175232, "epoch": 5.32, "learning_rate": 1.1985267479772975e-05, "loss": 0.4084, "step": 6296, "task_loss": 0.5355932712554932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4511442184448242, "epoch": 5.32, "learning_rate": 1.1979229561647144e-05, "loss": 0.4798, "step": 6297, "task_loss": 1.2487976551055908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40686511993408203, "epoch": 5.32, "learning_rate": 1.1973191643521314e-05, "loss": 0.3995, "step": 6298, "task_loss": 1.0678472518920898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6790695190429688, "epoch": 5.32, "learning_rate": 1.1967153725395485e-05, "loss": 0.4439, "step": 6299, "task_loss": 0.9739238619804382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5380898118019104, "epoch": 5.33, "learning_rate": 1.1961115807269654e-05, "loss": 0.5202, "step": 6300, "task_loss": 0.860866129398346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5985127091407776, "epoch": 5.33, "learning_rate": 1.1955077889143823e-05, "loss": 0.5638, "step": 6301, "task_loss": 1.0473815202713013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44799453020095825, "epoch": 5.33, "learning_rate": 1.1949039971017993e-05, "loss": 0.4479, "step": 6302, "task_loss": 0.5221502184867859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7269673943519592, "epoch": 5.33, "learning_rate": 1.1943002052892164e-05, "loss": 0.5037, "step": 6303, "task_loss": 1.4101895093917847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23132182657718658, "epoch": 5.33, "learning_rate": 1.1936964134766333e-05, "loss": 0.4106, "step": 6304, "task_loss": 1.0466407537460327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6625843048095703, "epoch": 5.33, "learning_rate": 1.1930926216640503e-05, "loss": 0.5084, "step": 6305, "task_loss": 0.25994470715522766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3752896785736084, "epoch": 5.33, "learning_rate": 1.1924888298514672e-05, "loss": 0.5054, "step": 6306, "task_loss": 0.49911820888519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28908780217170715, "epoch": 5.33, "learning_rate": 1.1918850380388843e-05, "loss": 0.4871, "step": 6307, "task_loss": 0.45031410455703735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3758608102798462, "epoch": 5.33, "learning_rate": 1.1912812462263012e-05, "loss": 0.4788, "step": 6308, "task_loss": 0.5773899555206299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4675436019897461, "epoch": 5.33, "learning_rate": 1.1906774544137182e-05, "loss": 0.4884, "step": 6309, "task_loss": 1.1176221370697021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2508590519428253, "epoch": 5.33, "learning_rate": 1.1900736626011353e-05, "loss": 0.3087, "step": 6310, "task_loss": 0.05610812082886696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40795424580574036, "epoch": 5.33, "learning_rate": 1.1894698707885522e-05, "loss": 0.3986, "step": 6311, "task_loss": 0.3809436857700348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3572429418563843, "epoch": 5.34, "learning_rate": 1.188866078975969e-05, "loss": 0.4171, "step": 6312, "task_loss": 0.39259326457977295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8064528703689575, "epoch": 5.34, "learning_rate": 1.1882622871633861e-05, "loss": 0.4943, "step": 6313, "task_loss": 0.9118799567222595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43614137172698975, "epoch": 5.34, "learning_rate": 1.1876584953508032e-05, "loss": 0.419, "step": 6314, "task_loss": 0.22691699862480164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26580676436424255, "epoch": 5.34, "learning_rate": 1.18705470353822e-05, "loss": 0.3712, "step": 6315, "task_loss": 0.8999698162078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32874739170074463, "epoch": 5.34, "learning_rate": 1.186450911725637e-05, "loss": 0.277, "step": 6316, "task_loss": 0.2942466735839844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39951837062835693, "epoch": 5.34, "learning_rate": 1.185847119913054e-05, "loss": 0.4682, "step": 6317, "task_loss": 0.8691373467445374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31066566705703735, "epoch": 5.34, "learning_rate": 1.185243328100471e-05, "loss": 0.3345, "step": 6318, "task_loss": 0.057947788387537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8941636085510254, "epoch": 5.34, "learning_rate": 1.184639536287888e-05, "loss": 0.565, "step": 6319, "task_loss": 1.304322361946106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4426254332065582, "epoch": 5.34, "learning_rate": 1.184035744475305e-05, "loss": 0.3892, "step": 6320, "task_loss": 1.3303678035736084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4537639915943146, "epoch": 5.34, "learning_rate": 1.1834319526627219e-05, "loss": 0.4501, "step": 6321, "task_loss": 1.2391867637634277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5315889716148376, "epoch": 5.34, "learning_rate": 1.182828160850139e-05, "loss": 0.5828, "step": 6322, "task_loss": 1.3790920972824097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1667761355638504, "epoch": 5.34, "learning_rate": 1.1822243690375558e-05, "loss": 0.341, "step": 6323, "task_loss": 0.0829068273305893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4704621732234955, "epoch": 5.35, "learning_rate": 1.1816205772249729e-05, "loss": 0.3422, "step": 6324, "task_loss": 0.30753093957901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5325760841369629, "epoch": 5.35, "learning_rate": 1.18101678541239e-05, "loss": 0.5024, "step": 6325, "task_loss": 0.7614436745643616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49799850583076477, "epoch": 5.35, "learning_rate": 1.1804129935998068e-05, "loss": 0.5632, "step": 6326, "task_loss": 0.31239959597587585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.383566677570343, "epoch": 5.35, "learning_rate": 1.1798092017872237e-05, "loss": 0.4879, "step": 6327, "task_loss": 0.514318585395813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.779148519039154, "epoch": 5.35, "learning_rate": 1.1792054099746408e-05, "loss": 0.5513, "step": 6328, "task_loss": 0.47329121828079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5753949880599976, "epoch": 5.35, "learning_rate": 1.1786016181620578e-05, "loss": 0.5285, "step": 6329, "task_loss": 0.5735852718353271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5698537230491638, "epoch": 5.35, "learning_rate": 1.1779978263494749e-05, "loss": 0.4434, "step": 6330, "task_loss": 0.7977812886238098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4839312732219696, "epoch": 5.35, "learning_rate": 1.1773940345368916e-05, "loss": 0.5419, "step": 6331, "task_loss": 0.7991262674331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2376115620136261, "epoch": 5.35, "learning_rate": 1.1767902427243087e-05, "loss": 0.376, "step": 6332, "task_loss": 0.6386260986328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42174047231674194, "epoch": 5.35, "learning_rate": 1.1761864509117257e-05, "loss": 0.4343, "step": 6333, "task_loss": 0.39548665285110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3531261086463928, "epoch": 5.35, "learning_rate": 1.1755826590991428e-05, "loss": 0.3574, "step": 6334, "task_loss": 0.19096443057060242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5499861836433411, "epoch": 5.35, "learning_rate": 1.1749788672865597e-05, "loss": 0.512, "step": 6335, "task_loss": 0.3946600556373596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38409343361854553, "epoch": 5.36, "learning_rate": 1.1743750754739766e-05, "loss": 0.3909, "step": 6336, "task_loss": 0.22466278076171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9586614370346069, "epoch": 5.36, "learning_rate": 1.1737712836613936e-05, "loss": 0.7231, "step": 6337, "task_loss": 1.786522388458252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4369172155857086, "epoch": 5.36, "learning_rate": 1.1731674918488107e-05, "loss": 0.4958, "step": 6338, "task_loss": 0.18028295040130615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7592207193374634, "epoch": 5.36, "learning_rate": 1.1725637000362276e-05, "loss": 0.5847, "step": 6339, "task_loss": 1.1129190921783447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40526050329208374, "epoch": 5.36, "learning_rate": 1.1719599082236446e-05, "loss": 0.4632, "step": 6340, "task_loss": 0.6580080986022949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6899394989013672, "epoch": 5.36, "learning_rate": 1.1713561164110615e-05, "loss": 0.4546, "step": 6341, "task_loss": 1.111331582069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3958185911178589, "epoch": 5.36, "learning_rate": 1.1707523245984786e-05, "loss": 0.674, "step": 6342, "task_loss": 0.5113704800605774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6063418984413147, "epoch": 5.36, "learning_rate": 1.1701485327858954e-05, "loss": 0.5251, "step": 6343, "task_loss": 0.5524447560310364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.344043105840683, "epoch": 5.36, "learning_rate": 1.1695447409733125e-05, "loss": 0.5171, "step": 6344, "task_loss": 0.6112009882926941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3540215790271759, "epoch": 5.36, "learning_rate": 1.1689409491607296e-05, "loss": 0.4928, "step": 6345, "task_loss": 0.3446291387081146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2772657573223114, "epoch": 5.36, "learning_rate": 1.1683371573481463e-05, "loss": 0.2337, "step": 6346, "task_loss": 0.4272204339504242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33394855260849, "epoch": 5.36, "learning_rate": 1.1677333655355633e-05, "loss": 0.4259, "step": 6347, "task_loss": 1.058383822441101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23304812610149384, "epoch": 5.37, "learning_rate": 1.1671295737229804e-05, "loss": 0.4209, "step": 6348, "task_loss": 0.5153782963752747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3450447916984558, "epoch": 5.37, "learning_rate": 1.1665257819103974e-05, "loss": 0.3658, "step": 6349, "task_loss": 0.3279207646846771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7239758968353271, "epoch": 5.37, "learning_rate": 1.1659219900978143e-05, "loss": 0.5568, "step": 6350, "task_loss": 1.4685956239700317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4245326817035675, "epoch": 5.37, "learning_rate": 1.1653181982852312e-05, "loss": 0.4062, "step": 6351, "task_loss": 0.5888448357582092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3318585157394409, "epoch": 5.37, "learning_rate": 1.1647144064726483e-05, "loss": 0.3297, "step": 6352, "task_loss": 0.19040919840335846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.489673376083374, "epoch": 5.37, "learning_rate": 1.1641106146600653e-05, "loss": 0.4339, "step": 6353, "task_loss": 1.7305190563201904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24561791121959686, "epoch": 5.37, "learning_rate": 1.1635068228474822e-05, "loss": 0.4788, "step": 6354, "task_loss": 0.8127731084823608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6332089900970459, "epoch": 5.37, "learning_rate": 1.1629030310348993e-05, "loss": 0.5449, "step": 6355, "task_loss": 0.5302404165267944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6534754037857056, "epoch": 5.37, "learning_rate": 1.1622992392223162e-05, "loss": 0.5078, "step": 6356, "task_loss": 0.6126828193664551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40321585536003113, "epoch": 5.37, "learning_rate": 1.1616954474097332e-05, "loss": 0.3717, "step": 6357, "task_loss": 0.22689639031887054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32186442613601685, "epoch": 5.37, "learning_rate": 1.1610916555971501e-05, "loss": 0.3777, "step": 6358, "task_loss": 0.4278821349143982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5879184007644653, "epoch": 5.38, "learning_rate": 1.1604878637845672e-05, "loss": 0.5787, "step": 6359, "task_loss": 0.6254206299781799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5147631168365479, "epoch": 5.38, "learning_rate": 1.1598840719719842e-05, "loss": 0.5361, "step": 6360, "task_loss": 0.8641071915626526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3364253640174866, "epoch": 5.38, "learning_rate": 1.1592802801594011e-05, "loss": 0.4698, "step": 6361, "task_loss": 0.44530194997787476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38226544857025146, "epoch": 5.38, "learning_rate": 1.158676488346818e-05, "loss": 0.4317, "step": 6362, "task_loss": 0.7274138927459717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3393498659133911, "epoch": 5.38, "learning_rate": 1.158072696534235e-05, "loss": 0.5512, "step": 6363, "task_loss": 0.6877003312110901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4102311432361603, "epoch": 5.38, "learning_rate": 1.1574689047216521e-05, "loss": 0.4554, "step": 6364, "task_loss": 0.23337914049625397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3142209053039551, "epoch": 5.38, "learning_rate": 1.156865112909069e-05, "loss": 0.4394, "step": 6365, "task_loss": 0.10010781139135361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5791646838188171, "epoch": 5.38, "learning_rate": 1.1562613210964859e-05, "loss": 0.4586, "step": 6366, "task_loss": 0.16384665668010712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6964355707168579, "epoch": 5.38, "learning_rate": 1.155657529283903e-05, "loss": 0.5514, "step": 6367, "task_loss": 0.7484047412872314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.328196257352829, "epoch": 5.38, "learning_rate": 1.15505373747132e-05, "loss": 0.44, "step": 6368, "task_loss": 0.5148732662200928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49885568022727966, "epoch": 5.38, "learning_rate": 1.1544499456587369e-05, "loss": 0.4583, "step": 6369, "task_loss": 0.20545554161071777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2769629955291748, "epoch": 5.38, "learning_rate": 1.153846153846154e-05, "loss": 0.4269, "step": 6370, "task_loss": 0.3381071090698242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5010702013969421, "epoch": 5.39, "learning_rate": 1.1532423620335708e-05, "loss": 0.5281, "step": 6371, "task_loss": 0.8603624105453491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2136225402355194, "epoch": 5.39, "learning_rate": 1.1526385702209879e-05, "loss": 0.4241, "step": 6372, "task_loss": 0.34543347358703613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42111849784851074, "epoch": 5.39, "learning_rate": 1.1520347784084048e-05, "loss": 0.4015, "step": 6373, "task_loss": 0.45545148849487305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7068221569061279, "epoch": 5.39, "learning_rate": 1.1514309865958218e-05, "loss": 0.6417, "step": 6374, "task_loss": 0.7508832216262817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36807018518447876, "epoch": 5.39, "learning_rate": 1.1508271947832389e-05, "loss": 0.3598, "step": 6375, "task_loss": 0.4108993113040924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28906649351119995, "epoch": 5.39, "learning_rate": 1.1502234029706558e-05, "loss": 0.3821, "step": 6376, "task_loss": 0.5109365582466125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41303277015686035, "epoch": 5.39, "learning_rate": 1.1496196111580727e-05, "loss": 0.3843, "step": 6377, "task_loss": 0.6203212738037109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32407620549201965, "epoch": 5.39, "learning_rate": 1.1490158193454897e-05, "loss": 0.4541, "step": 6378, "task_loss": 0.3936872184276581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6017963290214539, "epoch": 5.39, "learning_rate": 1.1484120275329068e-05, "loss": 0.471, "step": 6379, "task_loss": 0.6045811176300049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41324788331985474, "epoch": 5.39, "learning_rate": 1.1478082357203237e-05, "loss": 0.4025, "step": 6380, "task_loss": 1.426512360572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39666372537612915, "epoch": 5.39, "learning_rate": 1.1472044439077406e-05, "loss": 0.4812, "step": 6381, "task_loss": 0.386556476354599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40040332078933716, "epoch": 5.39, "learning_rate": 1.1466006520951576e-05, "loss": 0.3474, "step": 6382, "task_loss": 0.3610733449459076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42494675517082214, "epoch": 5.4, "learning_rate": 1.1459968602825747e-05, "loss": 0.4523, "step": 6383, "task_loss": 0.47069215774536133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5623818039894104, "epoch": 5.4, "learning_rate": 1.1453930684699916e-05, "loss": 0.4397, "step": 6384, "task_loss": 0.7198392152786255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5985987782478333, "epoch": 5.4, "learning_rate": 1.1447892766574086e-05, "loss": 0.4971, "step": 6385, "task_loss": 0.786270260810852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2824723422527313, "epoch": 5.4, "learning_rate": 1.1441854848448255e-05, "loss": 0.4207, "step": 6386, "task_loss": 0.0542759969830513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33957523107528687, "epoch": 5.4, "learning_rate": 1.1435816930322426e-05, "loss": 0.4801, "step": 6387, "task_loss": 0.5397275686264038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6089708805084229, "epoch": 5.4, "learning_rate": 1.1429779012196595e-05, "loss": 0.5876, "step": 6388, "task_loss": 0.6468815803527832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5920357704162598, "epoch": 5.4, "learning_rate": 1.1423741094070765e-05, "loss": 0.5292, "step": 6389, "task_loss": 0.25523439049720764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42629921436309814, "epoch": 5.4, "learning_rate": 1.1417703175944934e-05, "loss": 0.4412, "step": 6390, "task_loss": 1.1363967657089233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4772029519081116, "epoch": 5.4, "learning_rate": 1.1411665257819105e-05, "loss": 0.4764, "step": 6391, "task_loss": 0.7943968772888184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.439319372177124, "epoch": 5.4, "learning_rate": 1.1405627339693273e-05, "loss": 0.4985, "step": 6392, "task_loss": 1.0646936893463135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4251845180988312, "epoch": 5.4, "learning_rate": 1.1399589421567444e-05, "loss": 0.5704, "step": 6393, "task_loss": 0.3912878930568695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36972397565841675, "epoch": 5.4, "learning_rate": 1.1393551503441615e-05, "loss": 0.488, "step": 6394, "task_loss": 0.46971622109413147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45394986867904663, "epoch": 5.41, "learning_rate": 1.1387513585315783e-05, "loss": 0.381, "step": 6395, "task_loss": 0.629061758518219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48511433601379395, "epoch": 5.41, "learning_rate": 1.1381475667189952e-05, "loss": 0.4789, "step": 6396, "task_loss": 1.0026973485946655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27677080035209656, "epoch": 5.41, "learning_rate": 1.1375437749064123e-05, "loss": 0.4603, "step": 6397, "task_loss": 0.7759247422218323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44869422912597656, "epoch": 5.41, "learning_rate": 1.1369399830938294e-05, "loss": 0.3764, "step": 6398, "task_loss": 0.14486223459243774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4511607885360718, "epoch": 5.41, "learning_rate": 1.1363361912812464e-05, "loss": 0.4461, "step": 6399, "task_loss": 0.7577351331710815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39995741844177246, "epoch": 5.41, "learning_rate": 1.1357323994686631e-05, "loss": 0.4549, "step": 6400, "task_loss": 0.6534932851791382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3420477509498596, "epoch": 5.41, "learning_rate": 1.1351286076560802e-05, "loss": 0.4871, "step": 6401, "task_loss": 0.5055237412452698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3617399334907532, "epoch": 5.41, "learning_rate": 1.1345248158434972e-05, "loss": 0.5097, "step": 6402, "task_loss": 0.420353502035141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39528170228004456, "epoch": 5.41, "learning_rate": 1.1339210240309143e-05, "loss": 0.5112, "step": 6403, "task_loss": 0.7756186127662659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45848339796066284, "epoch": 5.41, "learning_rate": 1.1333172322183312e-05, "loss": 0.4251, "step": 6404, "task_loss": 0.7465702295303345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4680298864841461, "epoch": 5.41, "learning_rate": 1.132713440405748e-05, "loss": 0.4385, "step": 6405, "task_loss": 0.24607042968273163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36823293566703796, "epoch": 5.41, "learning_rate": 1.1321096485931651e-05, "loss": 0.5135, "step": 6406, "task_loss": 0.603877067565918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5664137005805969, "epoch": 5.42, "learning_rate": 1.131505856780582e-05, "loss": 0.6184, "step": 6407, "task_loss": 0.7587255239486694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5582165718078613, "epoch": 5.42, "learning_rate": 1.130902064967999e-05, "loss": 0.3506, "step": 6408, "task_loss": 0.443971186876297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3768201470375061, "epoch": 5.42, "learning_rate": 1.1302982731554161e-05, "loss": 0.3418, "step": 6409, "task_loss": 0.04182751476764679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.529041051864624, "epoch": 5.42, "learning_rate": 1.129694481342833e-05, "loss": 0.4336, "step": 6410, "task_loss": 1.1429561376571655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6748365163803101, "epoch": 5.42, "learning_rate": 1.1290906895302499e-05, "loss": 0.5418, "step": 6411, "task_loss": 0.8384679555892944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7615485191345215, "epoch": 5.42, "learning_rate": 1.128486897717667e-05, "loss": 0.611, "step": 6412, "task_loss": 1.0083147287368774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49328261613845825, "epoch": 5.42, "learning_rate": 1.127883105905084e-05, "loss": 0.396, "step": 6413, "task_loss": 0.8322123289108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6751197576522827, "epoch": 5.42, "learning_rate": 1.127279314092501e-05, "loss": 0.4995, "step": 6414, "task_loss": 1.2465537786483765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42600181698799133, "epoch": 5.42, "learning_rate": 1.1266755222799178e-05, "loss": 0.49, "step": 6415, "task_loss": 0.34879347681999207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45959609746932983, "epoch": 5.42, "learning_rate": 1.1260717304673349e-05, "loss": 0.5502, "step": 6416, "task_loss": 1.971940279006958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4291689991950989, "epoch": 5.42, "learning_rate": 1.1254679386547519e-05, "loss": 0.5159, "step": 6417, "task_loss": 0.9801028370857239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4734179973602295, "epoch": 5.42, "learning_rate": 1.124864146842169e-05, "loss": 0.3783, "step": 6418, "task_loss": 0.4718081057071686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3216676414012909, "epoch": 5.43, "learning_rate": 1.1242603550295859e-05, "loss": 0.3321, "step": 6419, "task_loss": 0.44735509157180786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32978951930999756, "epoch": 5.43, "learning_rate": 1.1236565632170027e-05, "loss": 0.4263, "step": 6420, "task_loss": 0.12534235417842865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35204386711120605, "epoch": 5.43, "learning_rate": 1.1230527714044198e-05, "loss": 0.4164, "step": 6421, "task_loss": 0.8288156986236572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6068124175071716, "epoch": 5.43, "learning_rate": 1.1224489795918369e-05, "loss": 0.4998, "step": 6422, "task_loss": 0.31855127215385437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4088638126850128, "epoch": 5.43, "learning_rate": 1.1218451877792537e-05, "loss": 0.6106, "step": 6423, "task_loss": 0.5528804659843445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3809813857078552, "epoch": 5.43, "learning_rate": 1.1212413959666708e-05, "loss": 0.4612, "step": 6424, "task_loss": 1.1572620868682861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41128963232040405, "epoch": 5.43, "learning_rate": 1.1206376041540877e-05, "loss": 0.4602, "step": 6425, "task_loss": 0.25753000378608704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35183602571487427, "epoch": 5.43, "learning_rate": 1.1200338123415047e-05, "loss": 0.5187, "step": 6426, "task_loss": 0.5737191438674927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6274593472480774, "epoch": 5.43, "learning_rate": 1.1194300205289216e-05, "loss": 0.579, "step": 6427, "task_loss": 0.6048048734664917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34852197766304016, "epoch": 5.43, "learning_rate": 1.1188262287163387e-05, "loss": 0.4036, "step": 6428, "task_loss": 0.18281786143779755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.648535966873169, "epoch": 5.43, "learning_rate": 1.1182224369037557e-05, "loss": 0.5554, "step": 6429, "task_loss": 0.40111470222473145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49604859948158264, "epoch": 5.44, "learning_rate": 1.1176186450911726e-05, "loss": 0.4556, "step": 6430, "task_loss": 0.2758389413356781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.320651650428772, "epoch": 5.44, "learning_rate": 1.1170148532785895e-05, "loss": 0.4658, "step": 6431, "task_loss": 0.13978277146816254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2970947325229645, "epoch": 5.44, "learning_rate": 1.1164110614660066e-05, "loss": 0.3977, "step": 6432, "task_loss": 0.14437319338321686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38155215978622437, "epoch": 5.44, "learning_rate": 1.1158072696534236e-05, "loss": 0.4388, "step": 6433, "task_loss": 0.4137100875377655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35221874713897705, "epoch": 5.44, "learning_rate": 1.1152034778408405e-05, "loss": 0.4908, "step": 6434, "task_loss": 0.7819401621818542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5120300054550171, "epoch": 5.44, "learning_rate": 1.1145996860282574e-05, "loss": 0.4366, "step": 6435, "task_loss": 0.8740734457969666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3415496349334717, "epoch": 5.44, "learning_rate": 1.1139958942156745e-05, "loss": 0.3521, "step": 6436, "task_loss": 0.7334772348403931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37128493189811707, "epoch": 5.44, "learning_rate": 1.1133921024030915e-05, "loss": 0.4353, "step": 6437, "task_loss": 0.7957016229629517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1981428563594818, "epoch": 5.44, "learning_rate": 1.1127883105905084e-05, "loss": 0.3754, "step": 6438, "task_loss": 0.053139738738536835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4762312173843384, "epoch": 5.44, "learning_rate": 1.1121845187779255e-05, "loss": 0.4237, "step": 6439, "task_loss": 0.0740610733628273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7873530387878418, "epoch": 5.44, "learning_rate": 1.1115807269653424e-05, "loss": 0.5736, "step": 6440, "task_loss": 0.8475521206855774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40703994035720825, "epoch": 5.44, "learning_rate": 1.1109769351527594e-05, "loss": 0.4414, "step": 6441, "task_loss": 1.3802993297576904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.345816969871521, "epoch": 5.45, "learning_rate": 1.1103731433401763e-05, "loss": 0.3743, "step": 6442, "task_loss": 0.8764696717262268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7445118427276611, "epoch": 5.45, "learning_rate": 1.1097693515275934e-05, "loss": 0.5509, "step": 6443, "task_loss": 0.37660279870033264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5909204483032227, "epoch": 5.45, "learning_rate": 1.1091655597150104e-05, "loss": 0.4894, "step": 6444, "task_loss": 1.2657946348190308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3718975782394409, "epoch": 5.45, "learning_rate": 1.1085617679024273e-05, "loss": 0.4873, "step": 6445, "task_loss": 0.501110315322876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33142584562301636, "epoch": 5.45, "learning_rate": 1.1079579760898442e-05, "loss": 0.559, "step": 6446, "task_loss": 0.7191939949989319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.611814022064209, "epoch": 5.45, "learning_rate": 1.1073541842772613e-05, "loss": 0.578, "step": 6447, "task_loss": 0.268498957157135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3986048400402069, "epoch": 5.45, "learning_rate": 1.1067503924646783e-05, "loss": 0.5072, "step": 6448, "task_loss": 0.48831111192703247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5388194918632507, "epoch": 5.45, "learning_rate": 1.1061466006520952e-05, "loss": 0.5412, "step": 6449, "task_loss": 0.5628845691680908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 1.0428508520126343, "epoch": 5.45, "learning_rate": 1.1055428088395121e-05, "loss": 0.5683, "step": 6450, "task_loss": 1.226529598236084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4304138422012329, "epoch": 5.45, "learning_rate": 1.1049390170269291e-05, "loss": 0.5456, "step": 6451, "task_loss": 0.5530934929847717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2857217788696289, "epoch": 5.45, "learning_rate": 1.1043352252143462e-05, "loss": 0.341, "step": 6452, "task_loss": 0.3938648998737335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27217262983322144, "epoch": 5.45, "learning_rate": 1.1037314334017631e-05, "loss": 0.3429, "step": 6453, "task_loss": 0.8099424839019775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6063569784164429, "epoch": 5.46, "learning_rate": 1.1031276415891801e-05, "loss": 0.5162, "step": 6454, "task_loss": 0.9602629542350769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4714534878730774, "epoch": 5.46, "learning_rate": 1.102523849776597e-05, "loss": 0.5237, "step": 6455, "task_loss": 0.34096571803092957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48314720392227173, "epoch": 5.46, "learning_rate": 1.1019200579640141e-05, "loss": 0.5024, "step": 6456, "task_loss": 0.4365038275718689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2683447003364563, "epoch": 5.46, "learning_rate": 1.101316266151431e-05, "loss": 0.4358, "step": 6457, "task_loss": 0.5945911407470703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8255869150161743, "epoch": 5.46, "learning_rate": 1.100712474338848e-05, "loss": 0.4642, "step": 6458, "task_loss": 0.9104630351066589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40880635380744934, "epoch": 5.46, "learning_rate": 1.1001086825262651e-05, "loss": 0.4075, "step": 6459, "task_loss": 0.377916544675827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43631649017333984, "epoch": 5.46, "learning_rate": 1.099504890713682e-05, "loss": 0.4785, "step": 6460, "task_loss": 0.5686953663825989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3980233371257782, "epoch": 5.46, "learning_rate": 1.0989010989010989e-05, "loss": 0.4119, "step": 6461, "task_loss": 1.5459638833999634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38641515374183655, "epoch": 5.46, "learning_rate": 1.098297307088516e-05, "loss": 0.4423, "step": 6462, "task_loss": 0.33670857548713684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.327347069978714, "epoch": 5.46, "learning_rate": 1.097693515275933e-05, "loss": 0.4952, "step": 6463, "task_loss": 0.5231347680091858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.621661365032196, "epoch": 5.46, "learning_rate": 1.09708972346335e-05, "loss": 0.4326, "step": 6464, "task_loss": 1.2433968782424927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2716473639011383, "epoch": 5.46, "learning_rate": 1.0964859316507668e-05, "loss": 0.4567, "step": 6465, "task_loss": 1.1938037872314453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31608444452285767, "epoch": 5.47, "learning_rate": 1.0958821398381838e-05, "loss": 0.4297, "step": 6466, "task_loss": 0.24395181238651276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39739203453063965, "epoch": 5.47, "learning_rate": 1.0952783480256009e-05, "loss": 0.5702, "step": 6467, "task_loss": 1.445815086364746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31090232729911804, "epoch": 5.47, "learning_rate": 1.094674556213018e-05, "loss": 0.4071, "step": 6468, "task_loss": 0.2702001929283142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9702149629592896, "epoch": 5.47, "learning_rate": 1.0940707644004348e-05, "loss": 0.6727, "step": 6469, "task_loss": 1.1243218183517456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25207996368408203, "epoch": 5.47, "learning_rate": 1.0934669725878517e-05, "loss": 0.3598, "step": 6470, "task_loss": 0.2202354520559311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7336540818214417, "epoch": 5.47, "learning_rate": 1.0928631807752688e-05, "loss": 0.5804, "step": 6471, "task_loss": 1.0822430849075317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3480544984340668, "epoch": 5.47, "learning_rate": 1.0922593889626856e-05, "loss": 0.5099, "step": 6472, "task_loss": 0.34024932980537415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4002733826637268, "epoch": 5.47, "learning_rate": 1.0916555971501027e-05, "loss": 0.3996, "step": 6473, "task_loss": 1.1772016286849976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33383792638778687, "epoch": 5.47, "learning_rate": 1.0910518053375198e-05, "loss": 0.4986, "step": 6474, "task_loss": 0.91908860206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4257444143295288, "epoch": 5.47, "learning_rate": 1.0904480135249366e-05, "loss": 0.4201, "step": 6475, "task_loss": 0.7394271492958069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3046905994415283, "epoch": 5.47, "learning_rate": 1.0898442217123535e-05, "loss": 0.4473, "step": 6476, "task_loss": 0.6722404360771179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5753474235534668, "epoch": 5.47, "learning_rate": 1.0892404298997706e-05, "loss": 0.5838, "step": 6477, "task_loss": 0.8624971508979797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5037566423416138, "epoch": 5.48, "learning_rate": 1.0886366380871877e-05, "loss": 0.5519, "step": 6478, "task_loss": 1.0764614343643188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.333042174577713, "epoch": 5.48, "learning_rate": 1.0880328462746045e-05, "loss": 0.3867, "step": 6479, "task_loss": 0.2359268218278885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30141475796699524, "epoch": 5.48, "learning_rate": 1.0874290544620214e-05, "loss": 0.3965, "step": 6480, "task_loss": 0.25370681285858154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3596739172935486, "epoch": 5.48, "learning_rate": 1.0868252626494385e-05, "loss": 0.3962, "step": 6481, "task_loss": 0.49250516295433044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2981621026992798, "epoch": 5.48, "learning_rate": 1.0862214708368555e-05, "loss": 0.4751, "step": 6482, "task_loss": 0.5364390015602112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40946996212005615, "epoch": 5.48, "learning_rate": 1.0856176790242726e-05, "loss": 0.5556, "step": 6483, "task_loss": 1.1326842308044434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3737828731536865, "epoch": 5.48, "learning_rate": 1.0850138872116893e-05, "loss": 0.4095, "step": 6484, "task_loss": 0.8820573091506958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36158227920532227, "epoch": 5.48, "learning_rate": 1.0844100953991064e-05, "loss": 0.3872, "step": 6485, "task_loss": 0.9013544321060181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38583821058273315, "epoch": 5.48, "learning_rate": 1.0838063035865234e-05, "loss": 0.5921, "step": 6486, "task_loss": 0.27379173040390015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48716580867767334, "epoch": 5.48, "learning_rate": 1.0832025117739405e-05, "loss": 0.455, "step": 6487, "task_loss": 0.6701850891113281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3052396774291992, "epoch": 5.48, "learning_rate": 1.0825987199613574e-05, "loss": 0.4788, "step": 6488, "task_loss": 0.0638214722275734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5256896018981934, "epoch": 5.48, "learning_rate": 1.0819949281487743e-05, "loss": 0.4606, "step": 6489, "task_loss": 1.0799148082733154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4381248354911804, "epoch": 5.49, "learning_rate": 1.0813911363361913e-05, "loss": 0.3705, "step": 6490, "task_loss": 0.5858855247497559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3847799301147461, "epoch": 5.49, "learning_rate": 1.0807873445236084e-05, "loss": 0.4466, "step": 6491, "task_loss": 1.9758387804031372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4201732277870178, "epoch": 5.49, "learning_rate": 1.0801835527110253e-05, "loss": 0.4744, "step": 6492, "task_loss": 0.5729876756668091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7057101130485535, "epoch": 5.49, "learning_rate": 1.0795797608984423e-05, "loss": 0.6407, "step": 6493, "task_loss": 0.5777044296264648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21219275891780853, "epoch": 5.49, "learning_rate": 1.0789759690858592e-05, "loss": 0.3787, "step": 6494, "task_loss": 0.7489749193191528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33288317918777466, "epoch": 5.49, "learning_rate": 1.0783721772732763e-05, "loss": 0.4137, "step": 6495, "task_loss": 0.3771737813949585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6851563453674316, "epoch": 5.49, "learning_rate": 1.0777683854606932e-05, "loss": 0.5772, "step": 6496, "task_loss": 1.367717981338501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.503210186958313, "epoch": 5.49, "learning_rate": 1.0771645936481102e-05, "loss": 0.4328, "step": 6497, "task_loss": 0.4273316264152527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32794228196144104, "epoch": 5.49, "learning_rate": 1.0765608018355273e-05, "loss": 0.4324, "step": 6498, "task_loss": 1.2285724878311157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8235206007957458, "epoch": 5.49, "learning_rate": 1.0759570100229442e-05, "loss": 0.589, "step": 6499, "task_loss": 1.5433768033981323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5095159411430359, "epoch": 5.49, "learning_rate": 1.075353218210361e-05, "loss": 0.3995, "step": 6500, "task_loss": 0.7390114068984985 }, { "epoch": 5.49, "eval_accuracy": 0.9094653465346535, "eval_loss": 0.29315459728240967, "eval_runtime": 227.6647, "eval_samples_per_second": 110.909, "eval_steps_per_second": 0.87, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.267596960067749, "epoch": 5.5, "learning_rate": 1.0747494263977781e-05, "loss": 0.3898, "step": 6501, "task_loss": 0.4630683958530426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4312867224216461, "epoch": 5.5, "learning_rate": 1.0741456345851952e-05, "loss": 0.3905, "step": 6502, "task_loss": 0.8280001878738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.307012140750885, "epoch": 5.5, "learning_rate": 1.073541842772612e-05, "loss": 0.486, "step": 6503, "task_loss": 0.2967236340045929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5807651877403259, "epoch": 5.5, "learning_rate": 1.072938050960029e-05, "loss": 0.4496, "step": 6504, "task_loss": 0.767183780670166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6329793930053711, "epoch": 5.5, "learning_rate": 1.072334259147446e-05, "loss": 0.4512, "step": 6505, "task_loss": 0.6327988505363464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46675875782966614, "epoch": 5.5, "learning_rate": 1.071730467334863e-05, "loss": 0.4581, "step": 6506, "task_loss": 1.4379515647888184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3040703237056732, "epoch": 5.5, "learning_rate": 1.07112667552228e-05, "loss": 0.402, "step": 6507, "task_loss": 0.6056128740310669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41238200664520264, "epoch": 5.5, "learning_rate": 1.070522883709697e-05, "loss": 0.4144, "step": 6508, "task_loss": 0.17623336613178253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5403857231140137, "epoch": 5.5, "learning_rate": 1.0699190918971139e-05, "loss": 0.5543, "step": 6509, "task_loss": 0.4151618778705597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3762030005455017, "epoch": 5.5, "learning_rate": 1.069315300084531e-05, "loss": 0.3474, "step": 6510, "task_loss": 0.42921024560928345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3554438650608063, "epoch": 5.5, "learning_rate": 1.0687115082719478e-05, "loss": 0.3935, "step": 6511, "task_loss": 0.4663337767124176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33299189805984497, "epoch": 5.5, "learning_rate": 1.0681077164593649e-05, "loss": 0.5027, "step": 6512, "task_loss": 0.9205884337425232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4028089940547943, "epoch": 5.51, "learning_rate": 1.067503924646782e-05, "loss": 0.4574, "step": 6513, "task_loss": 0.6021021008491516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2891165614128113, "epoch": 5.51, "learning_rate": 1.0669001328341988e-05, "loss": 0.507, "step": 6514, "task_loss": 0.6659336090087891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23118877410888672, "epoch": 5.51, "learning_rate": 1.0662963410216157e-05, "loss": 0.3977, "step": 6515, "task_loss": 0.6170550584793091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3620295524597168, "epoch": 5.51, "learning_rate": 1.0656925492090328e-05, "loss": 0.3807, "step": 6516, "task_loss": 0.15695656836032867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6828116178512573, "epoch": 5.51, "learning_rate": 1.0650887573964498e-05, "loss": 0.4686, "step": 6517, "task_loss": 1.088202714920044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7250940203666687, "epoch": 5.51, "learning_rate": 1.0644849655838667e-05, "loss": 0.4492, "step": 6518, "task_loss": 0.3931708335876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41655704379081726, "epoch": 5.51, "learning_rate": 1.0638811737712836e-05, "loss": 0.5239, "step": 6519, "task_loss": 0.5539785027503967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48181748390197754, "epoch": 5.51, "learning_rate": 1.0632773819587007e-05, "loss": 0.4251, "step": 6520, "task_loss": 0.7910305261611938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5616582632064819, "epoch": 5.51, "learning_rate": 1.0626735901461177e-05, "loss": 0.5108, "step": 6521, "task_loss": 0.5756950378417969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28778934478759766, "epoch": 5.51, "learning_rate": 1.0620697983335346e-05, "loss": 0.3888, "step": 6522, "task_loss": 0.5771900415420532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5263245105743408, "epoch": 5.51, "learning_rate": 1.0614660065209517e-05, "loss": 0.4604, "step": 6523, "task_loss": 0.7063573002815247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38132530450820923, "epoch": 5.51, "learning_rate": 1.0608622147083686e-05, "loss": 0.502, "step": 6524, "task_loss": 0.31479889154434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5895600318908691, "epoch": 5.52, "learning_rate": 1.0602584228957856e-05, "loss": 0.5172, "step": 6525, "task_loss": 1.113694667816162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38343244791030884, "epoch": 5.52, "learning_rate": 1.0596546310832025e-05, "loss": 0.3989, "step": 6526, "task_loss": 0.4916403293609619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25377658009529114, "epoch": 5.52, "learning_rate": 1.0590508392706196e-05, "loss": 0.3731, "step": 6527, "task_loss": 0.33476370573043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6367818713188171, "epoch": 5.52, "learning_rate": 1.0584470474580366e-05, "loss": 0.5268, "step": 6528, "task_loss": 0.7910555005073547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5747889280319214, "epoch": 5.52, "learning_rate": 1.0578432556454535e-05, "loss": 0.4819, "step": 6529, "task_loss": 0.37968146800994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45716729760169983, "epoch": 5.52, "learning_rate": 1.0572394638328704e-05, "loss": 0.4448, "step": 6530, "task_loss": 0.7606426477432251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6061018109321594, "epoch": 5.52, "learning_rate": 1.0566356720202874e-05, "loss": 0.5286, "step": 6531, "task_loss": 0.9126724600791931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3092736303806305, "epoch": 5.52, "learning_rate": 1.0560318802077045e-05, "loss": 0.3901, "step": 6532, "task_loss": 0.4011595845222473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46810585260391235, "epoch": 5.52, "learning_rate": 1.0554280883951216e-05, "loss": 0.4159, "step": 6533, "task_loss": 1.1186864376068115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15495768189430237, "epoch": 5.52, "learning_rate": 1.0548242965825383e-05, "loss": 0.3195, "step": 6534, "task_loss": 0.013231070712208748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.547240674495697, "epoch": 5.52, "learning_rate": 1.0542205047699553e-05, "loss": 0.4593, "step": 6535, "task_loss": 0.8312302827835083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31567081809043884, "epoch": 5.52, "learning_rate": 1.0536167129573724e-05, "loss": 0.5383, "step": 6536, "task_loss": 0.31786632537841797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49665123224258423, "epoch": 5.53, "learning_rate": 1.0530129211447893e-05, "loss": 0.4475, "step": 6537, "task_loss": 0.45344409346580505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37035349011421204, "epoch": 5.53, "learning_rate": 1.0524091293322063e-05, "loss": 0.5358, "step": 6538, "task_loss": 0.8119237422943115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49556565284729004, "epoch": 5.53, "learning_rate": 1.0518053375196232e-05, "loss": 0.462, "step": 6539, "task_loss": 0.4232543706893921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8214705586433411, "epoch": 5.53, "learning_rate": 1.0512015457070403e-05, "loss": 0.3938, "step": 6540, "task_loss": 1.2616276741027832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7333650588989258, "epoch": 5.53, "learning_rate": 1.0505977538944572e-05, "loss": 0.5687, "step": 6541, "task_loss": 1.2309322357177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43745866417884827, "epoch": 5.53, "learning_rate": 1.0499939620818742e-05, "loss": 0.3518, "step": 6542, "task_loss": 0.2668023705482483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4480666518211365, "epoch": 5.53, "learning_rate": 1.0493901702692913e-05, "loss": 0.4819, "step": 6543, "task_loss": 0.39329561591148376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6971666812896729, "epoch": 5.53, "learning_rate": 1.0487863784567082e-05, "loss": 0.4724, "step": 6544, "task_loss": 1.146660327911377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.296968936920166, "epoch": 5.53, "learning_rate": 1.048182586644125e-05, "loss": 0.4326, "step": 6545, "task_loss": 0.5446733832359314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3317286968231201, "epoch": 5.53, "learning_rate": 1.0475787948315421e-05, "loss": 0.4223, "step": 6546, "task_loss": 1.5151302814483643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22501042485237122, "epoch": 5.53, "learning_rate": 1.0469750030189592e-05, "loss": 0.4854, "step": 6547, "task_loss": 0.3349217176437378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4007885456085205, "epoch": 5.53, "learning_rate": 1.0463712112063762e-05, "loss": 0.3737, "step": 6548, "task_loss": 0.1894521415233612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6889320611953735, "epoch": 5.54, "learning_rate": 1.045767419393793e-05, "loss": 0.499, "step": 6549, "task_loss": 0.9466208219528198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5189695358276367, "epoch": 5.54, "learning_rate": 1.04516362758121e-05, "loss": 0.4881, "step": 6550, "task_loss": 1.6424872875213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36308836936950684, "epoch": 5.54, "learning_rate": 1.044559835768627e-05, "loss": 0.3585, "step": 6551, "task_loss": 1.2149078845977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24555683135986328, "epoch": 5.54, "learning_rate": 1.0439560439560441e-05, "loss": 0.3371, "step": 6552, "task_loss": 0.6002638936042786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46089375019073486, "epoch": 5.54, "learning_rate": 1.043352252143461e-05, "loss": 0.542, "step": 6553, "task_loss": 0.7457107305526733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46785590052604675, "epoch": 5.54, "learning_rate": 1.0427484603308779e-05, "loss": 0.4378, "step": 6554, "task_loss": 0.6289100050926208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5258687734603882, "epoch": 5.54, "learning_rate": 1.042144668518295e-05, "loss": 0.5046, "step": 6555, "task_loss": 1.0865079164505005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28771448135375977, "epoch": 5.54, "learning_rate": 1.041540876705712e-05, "loss": 0.4324, "step": 6556, "task_loss": 0.5617824792861938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4335520565509796, "epoch": 5.54, "learning_rate": 1.0409370848931289e-05, "loss": 0.4145, "step": 6557, "task_loss": 0.781493067741394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40867680311203003, "epoch": 5.54, "learning_rate": 1.040333293080546e-05, "loss": 0.4504, "step": 6558, "task_loss": 0.7835278511047363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34899264574050903, "epoch": 5.54, "learning_rate": 1.0397295012679628e-05, "loss": 0.3902, "step": 6559, "task_loss": 0.5776785612106323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7723253965377808, "epoch": 5.54, "learning_rate": 1.0391257094553799e-05, "loss": 0.4866, "step": 6560, "task_loss": 1.3316569328308105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.721788763999939, "epoch": 5.55, "learning_rate": 1.0385219176427968e-05, "loss": 0.466, "step": 6561, "task_loss": 0.5291551947593689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21560153365135193, "epoch": 5.55, "learning_rate": 1.0379181258302138e-05, "loss": 0.3585, "step": 6562, "task_loss": 0.06305918842554092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39289143681526184, "epoch": 5.55, "learning_rate": 1.0373143340176309e-05, "loss": 0.4994, "step": 6563, "task_loss": 0.6463732719421387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24446183443069458, "epoch": 5.55, "learning_rate": 1.0367105422050478e-05, "loss": 0.4623, "step": 6564, "task_loss": 0.12131837010383606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2689506709575653, "epoch": 5.55, "learning_rate": 1.0361067503924647e-05, "loss": 0.408, "step": 6565, "task_loss": 1.2945120334625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5966702699661255, "epoch": 5.55, "learning_rate": 1.0355029585798817e-05, "loss": 0.5155, "step": 6566, "task_loss": 0.8464288711547852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47356027364730835, "epoch": 5.55, "learning_rate": 1.0348991667672988e-05, "loss": 0.414, "step": 6567, "task_loss": 1.1683868169784546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4869329333305359, "epoch": 5.55, "learning_rate": 1.0342953749547157e-05, "loss": 0.4917, "step": 6568, "task_loss": 0.6952275037765503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4912315011024475, "epoch": 5.55, "learning_rate": 1.0336915831421326e-05, "loss": 0.4948, "step": 6569, "task_loss": 0.23212383687496185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44060713052749634, "epoch": 5.55, "learning_rate": 1.0330877913295496e-05, "loss": 0.3936, "step": 6570, "task_loss": 1.5084562301635742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7930140495300293, "epoch": 5.55, "learning_rate": 1.0324839995169667e-05, "loss": 0.5838, "step": 6571, "task_loss": 1.8577957153320312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3639751076698303, "epoch": 5.56, "learning_rate": 1.0318802077043836e-05, "loss": 0.404, "step": 6572, "task_loss": 1.8433747291564941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5634630918502808, "epoch": 5.56, "learning_rate": 1.0312764158918005e-05, "loss": 0.5731, "step": 6573, "task_loss": 0.2750808298587799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32636380195617676, "epoch": 5.56, "learning_rate": 1.0306726240792175e-05, "loss": 0.4074, "step": 6574, "task_loss": 0.5030247569084167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5527387261390686, "epoch": 5.56, "learning_rate": 1.0300688322666346e-05, "loss": 0.5011, "step": 6575, "task_loss": 0.6462475061416626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2701635956764221, "epoch": 5.56, "learning_rate": 1.0294650404540515e-05, "loss": 0.5192, "step": 6576, "task_loss": 0.14141885936260223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3958849012851715, "epoch": 5.56, "learning_rate": 1.0288612486414685e-05, "loss": 0.3827, "step": 6577, "task_loss": 0.338423490524292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22311869263648987, "epoch": 5.56, "learning_rate": 1.0282574568288854e-05, "loss": 0.436, "step": 6578, "task_loss": 0.6539350152015686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2878463864326477, "epoch": 5.56, "learning_rate": 1.0276536650163025e-05, "loss": 0.364, "step": 6579, "task_loss": 0.768605649471283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4917476177215576, "epoch": 5.56, "learning_rate": 1.0270498732037193e-05, "loss": 0.387, "step": 6580, "task_loss": 0.326134592294693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36220911145210266, "epoch": 5.56, "learning_rate": 1.0264460813911364e-05, "loss": 0.3874, "step": 6581, "task_loss": 0.4775090515613556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7118590474128723, "epoch": 5.56, "learning_rate": 1.0258422895785535e-05, "loss": 0.5283, "step": 6582, "task_loss": 1.0879887342453003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39235302805900574, "epoch": 5.56, "learning_rate": 1.0252384977659703e-05, "loss": 0.4324, "step": 6583, "task_loss": 0.7971075773239136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2609184682369232, "epoch": 5.57, "learning_rate": 1.0246347059533872e-05, "loss": 0.36, "step": 6584, "task_loss": 0.09308464080095291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4079083800315857, "epoch": 5.57, "learning_rate": 1.0240309141408043e-05, "loss": 0.4782, "step": 6585, "task_loss": 1.465531587600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22239530086517334, "epoch": 5.57, "learning_rate": 1.0234271223282213e-05, "loss": 0.3726, "step": 6586, "task_loss": 0.5140138864517212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2814912796020508, "epoch": 5.57, "learning_rate": 1.0228233305156382e-05, "loss": 0.3784, "step": 6587, "task_loss": 0.19766443967819214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5245600938796997, "epoch": 5.57, "learning_rate": 1.0222195387030551e-05, "loss": 0.4622, "step": 6588, "task_loss": 0.8332313299179077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5765300989151001, "epoch": 5.57, "learning_rate": 1.0216157468904722e-05, "loss": 0.6196, "step": 6589, "task_loss": 0.5054519176483154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4425010085105896, "epoch": 5.57, "learning_rate": 1.0210119550778892e-05, "loss": 0.4654, "step": 6590, "task_loss": 0.7009885907173157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3301137685775757, "epoch": 5.57, "learning_rate": 1.0204081632653061e-05, "loss": 0.4439, "step": 6591, "task_loss": 0.4593878984451294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.728769063949585, "epoch": 5.57, "learning_rate": 1.0198043714527232e-05, "loss": 0.4932, "step": 6592, "task_loss": 0.666101336479187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47873014211654663, "epoch": 5.57, "learning_rate": 1.01920057964014e-05, "loss": 0.5216, "step": 6593, "task_loss": 0.7723004817962646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3291347622871399, "epoch": 5.57, "learning_rate": 1.0185967878275571e-05, "loss": 0.4809, "step": 6594, "task_loss": 0.596272349357605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3822205662727356, "epoch": 5.57, "learning_rate": 1.017992996014974e-05, "loss": 0.5021, "step": 6595, "task_loss": 0.19459110498428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3742750883102417, "epoch": 5.58, "learning_rate": 1.017389204202391e-05, "loss": 0.4731, "step": 6596, "task_loss": 0.5387371778488159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33698081970214844, "epoch": 5.58, "learning_rate": 1.0167854123898081e-05, "loss": 0.4758, "step": 6597, "task_loss": 0.16839255392551422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27669286727905273, "epoch": 5.58, "learning_rate": 1.016181620577225e-05, "loss": 0.53, "step": 6598, "task_loss": 0.6592541933059692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3184857666492462, "epoch": 5.58, "learning_rate": 1.0155778287646419e-05, "loss": 0.316, "step": 6599, "task_loss": 0.15906718373298645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2754586637020111, "epoch": 5.58, "learning_rate": 1.014974036952059e-05, "loss": 0.3402, "step": 6600, "task_loss": 0.3642820417881012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6991140842437744, "epoch": 5.58, "learning_rate": 1.014370245139476e-05, "loss": 0.5774, "step": 6601, "task_loss": 1.3737553358078003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4560391902923584, "epoch": 5.58, "learning_rate": 1.0137664533268929e-05, "loss": 0.418, "step": 6602, "task_loss": 0.39918628334999084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8788506984710693, "epoch": 5.58, "learning_rate": 1.0131626615143098e-05, "loss": 0.5149, "step": 6603, "task_loss": 1.6970852613449097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21642425656318665, "epoch": 5.58, "learning_rate": 1.0125588697017269e-05, "loss": 0.4089, "step": 6604, "task_loss": 0.41126856207847595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8159054517745972, "epoch": 5.58, "learning_rate": 1.0119550778891439e-05, "loss": 0.5059, "step": 6605, "task_loss": 1.5232696533203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23980490863323212, "epoch": 5.58, "learning_rate": 1.0113512860765608e-05, "loss": 0.3965, "step": 6606, "task_loss": 0.1999807357788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46025875210762024, "epoch": 5.58, "learning_rate": 1.0107474942639779e-05, "loss": 0.4279, "step": 6607, "task_loss": 0.39939025044441223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2726353406906128, "epoch": 5.59, "learning_rate": 1.0101437024513947e-05, "loss": 0.5294, "step": 6608, "task_loss": 0.1501113921403885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48617076873779297, "epoch": 5.59, "learning_rate": 1.0095399106388118e-05, "loss": 0.5543, "step": 6609, "task_loss": 1.4043292999267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49170196056365967, "epoch": 5.59, "learning_rate": 1.0089361188262287e-05, "loss": 0.485, "step": 6610, "task_loss": 0.8918325304985046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.14193212985992432, "epoch": 5.59, "learning_rate": 1.0083323270136457e-05, "loss": 0.3734, "step": 6611, "task_loss": 2.095724582672119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40398669242858887, "epoch": 5.59, "learning_rate": 1.0077285352010628e-05, "loss": 0.45, "step": 6612, "task_loss": 0.3939119279384613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2947441637516022, "epoch": 5.59, "learning_rate": 1.0071247433884797e-05, "loss": 0.311, "step": 6613, "task_loss": 0.6469756364822388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4840250015258789, "epoch": 5.59, "learning_rate": 1.0065209515758966e-05, "loss": 0.4771, "step": 6614, "task_loss": 0.6271854639053345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33416247367858887, "epoch": 5.59, "learning_rate": 1.0059171597633136e-05, "loss": 0.4181, "step": 6615, "task_loss": 0.9095419645309448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24343633651733398, "epoch": 5.59, "learning_rate": 1.0053133679507307e-05, "loss": 0.3446, "step": 6616, "task_loss": 0.9406329393386841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23963600397109985, "epoch": 5.59, "learning_rate": 1.0047095761381477e-05, "loss": 0.3919, "step": 6617, "task_loss": 0.16745439171791077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37312281131744385, "epoch": 5.59, "learning_rate": 1.0041057843255645e-05, "loss": 0.3665, "step": 6618, "task_loss": 0.1863144040107727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.433851957321167, "epoch": 5.59, "learning_rate": 1.0035019925129815e-05, "loss": 0.3783, "step": 6619, "task_loss": 1.056527853012085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4294077754020691, "epoch": 5.6, "learning_rate": 1.0028982007003986e-05, "loss": 0.3986, "step": 6620, "task_loss": 0.5657487511634827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5478466153144836, "epoch": 5.6, "learning_rate": 1.0022944088878156e-05, "loss": 0.5003, "step": 6621, "task_loss": 0.29477477073669434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4397248327732086, "epoch": 5.6, "learning_rate": 1.0016906170752325e-05, "loss": 0.4485, "step": 6622, "task_loss": 1.2294903993606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5015079975128174, "epoch": 5.6, "learning_rate": 1.0010868252626494e-05, "loss": 0.408, "step": 6623, "task_loss": 0.9752113223075867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6867103576660156, "epoch": 5.6, "learning_rate": 1.0004830334500665e-05, "loss": 0.5454, "step": 6624, "task_loss": 1.0395159721374512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2731471061706543, "epoch": 5.6, "learning_rate": 9.998792416374835e-06, "loss": 0.5503, "step": 6625, "task_loss": 0.033146001398563385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37536829710006714, "epoch": 5.6, "learning_rate": 9.992754498249004e-06, "loss": 0.3884, "step": 6626, "task_loss": 0.5605231523513794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40159839391708374, "epoch": 5.6, "learning_rate": 9.986716580123175e-06, "loss": 0.3766, "step": 6627, "task_loss": 1.0829672813415527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2612251043319702, "epoch": 5.6, "learning_rate": 9.980678661997344e-06, "loss": 0.5002, "step": 6628, "task_loss": 0.23830151557922363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7409428358078003, "epoch": 5.6, "learning_rate": 9.974640743871514e-06, "loss": 0.5063, "step": 6629, "task_loss": 0.5875707268714905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2884894609451294, "epoch": 5.6, "learning_rate": 9.968602825745683e-06, "loss": 0.3876, "step": 6630, "task_loss": 0.34800511598587036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2978453040122986, "epoch": 5.6, "learning_rate": 9.962564907619854e-06, "loss": 0.4055, "step": 6631, "task_loss": 1.5410786867141724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5237534642219543, "epoch": 5.61, "learning_rate": 9.956526989494024e-06, "loss": 0.4129, "step": 6632, "task_loss": 0.3374232351779938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27444136142730713, "epoch": 5.61, "learning_rate": 9.950489071368193e-06, "loss": 0.5519, "step": 6633, "task_loss": 0.6435171365737915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36407366394996643, "epoch": 5.61, "learning_rate": 9.944451153242362e-06, "loss": 0.539, "step": 6634, "task_loss": 0.4501280188560486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2637448310852051, "epoch": 5.61, "learning_rate": 9.938413235116533e-06, "loss": 0.4482, "step": 6635, "task_loss": 0.0802384540438652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7030282020568848, "epoch": 5.61, "learning_rate": 9.932375316990703e-06, "loss": 0.5904, "step": 6636, "task_loss": 0.9231976270675659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36062759160995483, "epoch": 5.61, "learning_rate": 9.926337398864872e-06, "loss": 0.3176, "step": 6637, "task_loss": 0.15527167916297913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39053842425346375, "epoch": 5.61, "learning_rate": 9.92029948073904e-06, "loss": 0.4494, "step": 6638, "task_loss": 0.7166573405265808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41911593079566956, "epoch": 5.61, "learning_rate": 9.914261562613211e-06, "loss": 0.49, "step": 6639, "task_loss": 0.03881998732686043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33602818846702576, "epoch": 5.61, "learning_rate": 9.908223644487382e-06, "loss": 0.4675, "step": 6640, "task_loss": 0.5163300037384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3858576714992523, "epoch": 5.61, "learning_rate": 9.902185726361551e-06, "loss": 0.3959, "step": 6641, "task_loss": 0.6001129746437073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21975365281105042, "epoch": 5.61, "learning_rate": 9.896147808235721e-06, "loss": 0.4421, "step": 6642, "task_loss": 0.24245241284370422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23400837182998657, "epoch": 5.61, "learning_rate": 9.89010989010989e-06, "loss": 0.4236, "step": 6643, "task_loss": 0.3544316291809082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29419493675231934, "epoch": 5.62, "learning_rate": 9.884071971984061e-06, "loss": 0.4154, "step": 6644, "task_loss": 0.06820455193519592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.722952127456665, "epoch": 5.62, "learning_rate": 9.87803405385823e-06, "loss": 0.562, "step": 6645, "task_loss": 0.6815069913864136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2777334749698639, "epoch": 5.62, "learning_rate": 9.8719961357324e-06, "loss": 0.3677, "step": 6646, "task_loss": 0.19722190499305725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29160311818122864, "epoch": 5.62, "learning_rate": 9.865958217606571e-06, "loss": 0.3664, "step": 6647, "task_loss": 1.442821979522705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3852737247943878, "epoch": 5.62, "learning_rate": 9.85992029948074e-06, "loss": 0.4305, "step": 6648, "task_loss": 0.22547666728496552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5916048884391785, "epoch": 5.62, "learning_rate": 9.853882381354909e-06, "loss": 0.3556, "step": 6649, "task_loss": 0.41200241446495056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46638646721839905, "epoch": 5.62, "learning_rate": 9.84784446322908e-06, "loss": 0.4475, "step": 6650, "task_loss": 0.833582878112793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3266313076019287, "epoch": 5.62, "learning_rate": 9.84180654510325e-06, "loss": 0.355, "step": 6651, "task_loss": 0.37757450342178345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3368372917175293, "epoch": 5.62, "learning_rate": 9.835768626977419e-06, "loss": 0.3408, "step": 6652, "task_loss": 0.5827327370643616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35288769006729126, "epoch": 5.62, "learning_rate": 9.829730708851588e-06, "loss": 0.4475, "step": 6653, "task_loss": 0.9397169947624207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39362722635269165, "epoch": 5.62, "learning_rate": 9.823692790725758e-06, "loss": 0.4139, "step": 6654, "task_loss": 0.6743589043617249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5709276795387268, "epoch": 5.63, "learning_rate": 9.817654872599929e-06, "loss": 0.5207, "step": 6655, "task_loss": 1.3481749296188354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.65226811170578, "epoch": 5.63, "learning_rate": 9.811616954474098e-06, "loss": 0.3978, "step": 6656, "task_loss": 1.205975890159607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45755618810653687, "epoch": 5.63, "learning_rate": 9.805579036348266e-06, "loss": 0.5541, "step": 6657, "task_loss": 0.5163716673851013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5052016377449036, "epoch": 5.63, "learning_rate": 9.799541118222437e-06, "loss": 0.4759, "step": 6658, "task_loss": 0.38544192910194397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25063127279281616, "epoch": 5.63, "learning_rate": 9.793503200096608e-06, "loss": 0.2814, "step": 6659, "task_loss": 0.16403862833976746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7950208187103271, "epoch": 5.63, "learning_rate": 9.787465281970776e-06, "loss": 0.5465, "step": 6660, "task_loss": 1.6837916374206543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34111353754997253, "epoch": 5.63, "learning_rate": 9.781427363844947e-06, "loss": 0.3591, "step": 6661, "task_loss": 0.6819562315940857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6122892498970032, "epoch": 5.63, "learning_rate": 9.775389445719116e-06, "loss": 0.4821, "step": 6662, "task_loss": 1.3727117776870728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44599127769470215, "epoch": 5.63, "learning_rate": 9.769351527593286e-06, "loss": 0.3798, "step": 6663, "task_loss": 0.7604700922966003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3460988998413086, "epoch": 5.63, "learning_rate": 9.763313609467455e-06, "loss": 0.4216, "step": 6664, "task_loss": 1.0671651363372803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4767155945301056, "epoch": 5.63, "learning_rate": 9.757275691341626e-06, "loss": 0.5642, "step": 6665, "task_loss": 0.7221386432647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1909727156162262, "epoch": 5.63, "learning_rate": 9.751237773215796e-06, "loss": 0.3822, "step": 6666, "task_loss": 0.5514643788337708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33717846870422363, "epoch": 5.64, "learning_rate": 9.745199855089965e-06, "loss": 0.4513, "step": 6667, "task_loss": 0.625873863697052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34607917070388794, "epoch": 5.64, "learning_rate": 9.739161936964134e-06, "loss": 0.4372, "step": 6668, "task_loss": 0.3548850119113922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3890557289123535, "epoch": 5.64, "learning_rate": 9.733124018838305e-06, "loss": 0.3306, "step": 6669, "task_loss": 0.4444583058357239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4147912859916687, "epoch": 5.64, "learning_rate": 9.727086100712475e-06, "loss": 0.5539, "step": 6670, "task_loss": 1.2830756902694702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3511492609977722, "epoch": 5.64, "learning_rate": 9.721048182586644e-06, "loss": 0.4775, "step": 6671, "task_loss": 0.5181450247764587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6945109367370605, "epoch": 5.64, "learning_rate": 9.715010264460813e-06, "loss": 0.5289, "step": 6672, "task_loss": 0.623112678527832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4633328318595886, "epoch": 5.64, "learning_rate": 9.708972346334984e-06, "loss": 0.4374, "step": 6673, "task_loss": 1.3965741395950317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4342568516731262, "epoch": 5.64, "learning_rate": 9.702934428209154e-06, "loss": 0.584, "step": 6674, "task_loss": 1.0722990036010742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3214791715145111, "epoch": 5.64, "learning_rate": 9.696896510083323e-06, "loss": 0.3816, "step": 6675, "task_loss": 0.23681554198265076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44264864921569824, "epoch": 5.64, "learning_rate": 9.690858591957494e-06, "loss": 0.4181, "step": 6676, "task_loss": 0.8178834319114685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5027637481689453, "epoch": 5.64, "learning_rate": 9.684820673831663e-06, "loss": 0.5647, "step": 6677, "task_loss": 1.0067014694213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28282684087753296, "epoch": 5.64, "learning_rate": 9.678782755705833e-06, "loss": 0.3083, "step": 6678, "task_loss": 0.23867210745811462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2570165693759918, "epoch": 5.65, "learning_rate": 9.672744837580002e-06, "loss": 0.4248, "step": 6679, "task_loss": 0.11859311908483505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3490985035896301, "epoch": 5.65, "learning_rate": 9.666706919454173e-06, "loss": 0.582, "step": 6680, "task_loss": 0.6593077182769775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4586387574672699, "epoch": 5.65, "learning_rate": 9.660669001328343e-06, "loss": 0.4267, "step": 6681, "task_loss": 0.18626613914966583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29908257722854614, "epoch": 5.65, "learning_rate": 9.654631083202512e-06, "loss": 0.441, "step": 6682, "task_loss": 0.5742673277854919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6133557558059692, "epoch": 5.65, "learning_rate": 9.648593165076681e-06, "loss": 0.5211, "step": 6683, "task_loss": 0.8323172330856323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3714480400085449, "epoch": 5.65, "learning_rate": 9.642555246950852e-06, "loss": 0.4495, "step": 6684, "task_loss": 0.5231508016586304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3637115955352783, "epoch": 5.65, "learning_rate": 9.636517328825022e-06, "loss": 0.3315, "step": 6685, "task_loss": 0.44225868582725525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3607504367828369, "epoch": 5.65, "learning_rate": 9.630479410699193e-06, "loss": 0.4525, "step": 6686, "task_loss": 0.806952178478241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30264222621917725, "epoch": 5.65, "learning_rate": 9.62444149257336e-06, "loss": 0.3679, "step": 6687, "task_loss": 0.3033922016620636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4768761396408081, "epoch": 5.65, "learning_rate": 9.61840357444753e-06, "loss": 0.4562, "step": 6688, "task_loss": 0.74522465467453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5096253156661987, "epoch": 5.65, "learning_rate": 9.612365656321701e-06, "loss": 0.4471, "step": 6689, "task_loss": 0.8947878479957581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33347612619400024, "epoch": 5.65, "learning_rate": 9.606327738195872e-06, "loss": 0.4003, "step": 6690, "task_loss": 0.5078001022338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5866214036941528, "epoch": 5.66, "learning_rate": 9.60028982007004e-06, "loss": 0.6146, "step": 6691, "task_loss": 1.1787878274917603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3063579201698303, "epoch": 5.66, "learning_rate": 9.59425190194421e-06, "loss": 0.4343, "step": 6692, "task_loss": 1.459625005722046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31216102838516235, "epoch": 5.66, "learning_rate": 9.58821398381838e-06, "loss": 0.4419, "step": 6693, "task_loss": 0.4528191387653351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46292898058891296, "epoch": 5.66, "learning_rate": 9.58217606569255e-06, "loss": 0.4824, "step": 6694, "task_loss": 0.8653815388679504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5531997084617615, "epoch": 5.66, "learning_rate": 9.57613814756672e-06, "loss": 0.7177, "step": 6695, "task_loss": 1.3586716651916504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37402403354644775, "epoch": 5.66, "learning_rate": 9.57010022944089e-06, "loss": 0.3436, "step": 6696, "task_loss": 0.41005295515060425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39014309644699097, "epoch": 5.66, "learning_rate": 9.564062311315059e-06, "loss": 0.3683, "step": 6697, "task_loss": 0.8219631314277649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33870917558670044, "epoch": 5.66, "learning_rate": 9.55802439318923e-06, "loss": 0.4387, "step": 6698, "task_loss": 0.41287946701049805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35838326811790466, "epoch": 5.66, "learning_rate": 9.551986475063398e-06, "loss": 0.3966, "step": 6699, "task_loss": 0.6040765643119812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4094226658344269, "epoch": 5.66, "learning_rate": 9.545948556937569e-06, "loss": 0.514, "step": 6700, "task_loss": 1.1427186727523804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.300124853849411, "epoch": 5.66, "learning_rate": 9.53991063881174e-06, "loss": 0.3639, "step": 6701, "task_loss": 0.6028019189834595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4802337884902954, "epoch": 5.66, "learning_rate": 9.533872720685908e-06, "loss": 0.4124, "step": 6702, "task_loss": 0.9045143127441406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29991692304611206, "epoch": 5.67, "learning_rate": 9.527834802560077e-06, "loss": 0.3858, "step": 6703, "task_loss": 0.5465149283409119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6183332204818726, "epoch": 5.67, "learning_rate": 9.521796884434248e-06, "loss": 0.4742, "step": 6704, "task_loss": 1.1405081748962402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6776962280273438, "epoch": 5.67, "learning_rate": 9.515758966308418e-06, "loss": 0.5264, "step": 6705, "task_loss": 1.1502282619476318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6632376909255981, "epoch": 5.67, "learning_rate": 9.509721048182587e-06, "loss": 0.4702, "step": 6706, "task_loss": 0.25684958696365356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46836820244789124, "epoch": 5.67, "learning_rate": 9.503683130056756e-06, "loss": 0.3853, "step": 6707, "task_loss": 0.7917205691337585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6035056710243225, "epoch": 5.67, "learning_rate": 9.497645211930927e-06, "loss": 0.6128, "step": 6708, "task_loss": 1.3602275848388672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26016175746917725, "epoch": 5.67, "learning_rate": 9.491607293805097e-06, "loss": 0.3558, "step": 6709, "task_loss": 0.13024893403053284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36990880966186523, "epoch": 5.67, "learning_rate": 9.485569375679266e-06, "loss": 0.5751, "step": 6710, "task_loss": 0.6088328957557678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4181291460990906, "epoch": 5.67, "learning_rate": 9.479531457553437e-06, "loss": 0.4669, "step": 6711, "task_loss": 0.36415067315101624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3294379413127899, "epoch": 5.67, "learning_rate": 9.473493539427606e-06, "loss": 0.3633, "step": 6712, "task_loss": 0.5303905606269836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1962207853794098, "epoch": 5.67, "learning_rate": 9.467455621301776e-06, "loss": 0.3864, "step": 6713, "task_loss": 0.2849324345588684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.447213351726532, "epoch": 5.67, "learning_rate": 9.461417703175945e-06, "loss": 0.3485, "step": 6714, "task_loss": 0.48791980743408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3434171676635742, "epoch": 5.68, "learning_rate": 9.455379785050116e-06, "loss": 0.4139, "step": 6715, "task_loss": 1.0164415836334229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38467562198638916, "epoch": 5.68, "learning_rate": 9.449341866924286e-06, "loss": 0.3267, "step": 6716, "task_loss": 0.37276050448417664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3801423907279968, "epoch": 5.68, "learning_rate": 9.443303948798455e-06, "loss": 0.4988, "step": 6717, "task_loss": 1.2556339502334595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.59651118516922, "epoch": 5.68, "learning_rate": 9.437266030672624e-06, "loss": 0.4904, "step": 6718, "task_loss": 1.3749816417694092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.336675763130188, "epoch": 5.68, "learning_rate": 9.431228112546794e-06, "loss": 0.4878, "step": 6719, "task_loss": 0.05013589933514595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5830874443054199, "epoch": 5.68, "learning_rate": 9.425190194420965e-06, "loss": 0.5124, "step": 6720, "task_loss": 1.185306191444397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6072978377342224, "epoch": 5.68, "learning_rate": 9.419152276295134e-06, "loss": 0.5109, "step": 6721, "task_loss": 0.5938040614128113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30367282032966614, "epoch": 5.68, "learning_rate": 9.413114358169303e-06, "loss": 0.5102, "step": 6722, "task_loss": 0.22702331840991974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5418579578399658, "epoch": 5.68, "learning_rate": 9.407076440043473e-06, "loss": 0.5899, "step": 6723, "task_loss": 1.3945199251174927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34819936752319336, "epoch": 5.68, "learning_rate": 9.401038521917644e-06, "loss": 0.5203, "step": 6724, "task_loss": 0.5698555111885071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35385438799858093, "epoch": 5.68, "learning_rate": 9.395000603791813e-06, "loss": 0.4758, "step": 6725, "task_loss": 0.381542444229126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42342662811279297, "epoch": 5.69, "learning_rate": 9.388962685665983e-06, "loss": 0.4329, "step": 6726, "task_loss": 0.5486181378364563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.598606288433075, "epoch": 5.69, "learning_rate": 9.382924767540152e-06, "loss": 0.5019, "step": 6727, "task_loss": 0.7312585711479187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5449569821357727, "epoch": 5.69, "learning_rate": 9.376886849414323e-06, "loss": 0.525, "step": 6728, "task_loss": 0.7886682152748108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3742581009864807, "epoch": 5.69, "learning_rate": 9.370848931288492e-06, "loss": 0.4147, "step": 6729, "task_loss": 0.46247342228889465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4146203398704529, "epoch": 5.69, "learning_rate": 9.364811013162662e-06, "loss": 0.3097, "step": 6730, "task_loss": 0.3430410623550415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3924112915992737, "epoch": 5.69, "learning_rate": 9.358773095036833e-06, "loss": 0.5037, "step": 6731, "task_loss": 0.9792919158935547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23460236191749573, "epoch": 5.69, "learning_rate": 9.352735176911002e-06, "loss": 0.464, "step": 6732, "task_loss": 0.9901689887046814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3037591576576233, "epoch": 5.69, "learning_rate": 9.34669725878517e-06, "loss": 0.4725, "step": 6733, "task_loss": 0.8621353507041931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42176276445388794, "epoch": 5.69, "learning_rate": 9.340659340659341e-06, "loss": 0.4571, "step": 6734, "task_loss": 0.7237581610679626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4091700315475464, "epoch": 5.69, "learning_rate": 9.334621422533512e-06, "loss": 0.4007, "step": 6735, "task_loss": 0.43505626916885376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6379737257957458, "epoch": 5.69, "learning_rate": 9.32858350440768e-06, "loss": 0.7213, "step": 6736, "task_loss": 0.5482211709022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.345699280500412, "epoch": 5.69, "learning_rate": 9.32254558628185e-06, "loss": 0.3487, "step": 6737, "task_loss": 1.1413778066635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40469950437545776, "epoch": 5.7, "learning_rate": 9.31650766815602e-06, "loss": 0.5317, "step": 6738, "task_loss": 1.0044339895248413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39531129598617554, "epoch": 5.7, "learning_rate": 9.31046975003019e-06, "loss": 0.3768, "step": 6739, "task_loss": 0.2556597590446472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37428170442581177, "epoch": 5.7, "learning_rate": 9.30443183190436e-06, "loss": 0.4787, "step": 6740, "task_loss": 0.6278769969940186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4386274814605713, "epoch": 5.7, "learning_rate": 9.298393913778528e-06, "loss": 0.4365, "step": 6741, "task_loss": 0.8018932938575745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.319701224565506, "epoch": 5.7, "learning_rate": 9.292355995652699e-06, "loss": 0.4291, "step": 6742, "task_loss": 0.5312966108322144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2922467887401581, "epoch": 5.7, "learning_rate": 9.28631807752687e-06, "loss": 0.4232, "step": 6743, "task_loss": 0.5795371532440186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4109874665737152, "epoch": 5.7, "learning_rate": 9.280280159401038e-06, "loss": 0.3291, "step": 6744, "task_loss": 0.568673312664032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44207483530044556, "epoch": 5.7, "learning_rate": 9.274242241275209e-06, "loss": 0.4103, "step": 6745, "task_loss": 0.29672324657440186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35554665327072144, "epoch": 5.7, "learning_rate": 9.268204323149378e-06, "loss": 0.3378, "step": 6746, "task_loss": 0.48400193452835083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5641986727714539, "epoch": 5.7, "learning_rate": 9.262166405023548e-06, "loss": 0.5711, "step": 6747, "task_loss": 0.37638866901397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42453595995903015, "epoch": 5.7, "learning_rate": 9.256128486897717e-06, "loss": 0.4423, "step": 6748, "task_loss": 0.28984907269477844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4854713976383209, "epoch": 5.7, "learning_rate": 9.250090568771888e-06, "loss": 0.3674, "step": 6749, "task_loss": 0.7928124666213989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5808979868888855, "epoch": 5.71, "learning_rate": 9.244052650646058e-06, "loss": 0.3477, "step": 6750, "task_loss": 0.40570151805877686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.317829966545105, "epoch": 5.71, "learning_rate": 9.238014732520227e-06, "loss": 0.3822, "step": 6751, "task_loss": 0.3280867040157318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25338926911354065, "epoch": 5.71, "learning_rate": 9.231976814394396e-06, "loss": 0.2955, "step": 6752, "task_loss": 0.9392604231834412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3951855003833771, "epoch": 5.71, "learning_rate": 9.225938896268567e-06, "loss": 0.3871, "step": 6753, "task_loss": 0.5460217595100403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4774053692817688, "epoch": 5.71, "learning_rate": 9.219900978142737e-06, "loss": 0.4375, "step": 6754, "task_loss": 1.0834779739379883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32697242498397827, "epoch": 5.71, "learning_rate": 9.213863060016908e-06, "loss": 0.4048, "step": 6755, "task_loss": 0.6568202376365662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45566046237945557, "epoch": 5.71, "learning_rate": 9.207825141891075e-06, "loss": 0.5246, "step": 6756, "task_loss": 0.24032281339168549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6121571063995361, "epoch": 5.71, "learning_rate": 9.201787223765246e-06, "loss": 0.4573, "step": 6757, "task_loss": 1.489749789237976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5307443141937256, "epoch": 5.71, "learning_rate": 9.195749305639416e-06, "loss": 0.4208, "step": 6758, "task_loss": 1.1334186792373657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46949708461761475, "epoch": 5.71, "learning_rate": 9.189711387513587e-06, "loss": 0.4778, "step": 6759, "task_loss": 0.9315220713615417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43076300621032715, "epoch": 5.71, "learning_rate": 9.183673469387756e-06, "loss": 0.4689, "step": 6760, "task_loss": 0.5402387380599976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3442353308200836, "epoch": 5.71, "learning_rate": 9.177635551261925e-06, "loss": 0.4172, "step": 6761, "task_loss": 0.293009877204895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5514411330223083, "epoch": 5.72, "learning_rate": 9.171597633136095e-06, "loss": 0.4457, "step": 6762, "task_loss": 0.41219499707221985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7351735234260559, "epoch": 5.72, "learning_rate": 9.165559715010266e-06, "loss": 0.496, "step": 6763, "task_loss": 1.4759877920150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2289533019065857, "epoch": 5.72, "learning_rate": 9.159521796884435e-06, "loss": 0.3983, "step": 6764, "task_loss": 0.9207218289375305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43548262119293213, "epoch": 5.72, "learning_rate": 9.153483878758605e-06, "loss": 0.4992, "step": 6765, "task_loss": 0.33177146315574646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3552061915397644, "epoch": 5.72, "learning_rate": 9.147445960632774e-06, "loss": 0.4658, "step": 6766, "task_loss": 1.2425211668014526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32525435090065, "epoch": 5.72, "learning_rate": 9.141408042506945e-06, "loss": 0.4366, "step": 6767, "task_loss": 0.27826035022735596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26304855942726135, "epoch": 5.72, "learning_rate": 9.135370124381113e-06, "loss": 0.4448, "step": 6768, "task_loss": 0.9599641561508179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4889734387397766, "epoch": 5.72, "learning_rate": 9.129332206255284e-06, "loss": 0.4101, "step": 6769, "task_loss": 1.1959550380706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24056431651115417, "epoch": 5.72, "learning_rate": 9.123294288129455e-06, "loss": 0.4447, "step": 6770, "task_loss": 0.07262258976697922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5809975862503052, "epoch": 5.72, "learning_rate": 9.117256370003622e-06, "loss": 0.5391, "step": 6771, "task_loss": 0.8092759847640991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5652029514312744, "epoch": 5.72, "learning_rate": 9.111218451877792e-06, "loss": 0.4202, "step": 6772, "task_loss": 0.7727020382881165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37270236015319824, "epoch": 5.72, "learning_rate": 9.105180533751963e-06, "loss": 0.465, "step": 6773, "task_loss": 0.45642298460006714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3966851830482483, "epoch": 5.73, "learning_rate": 9.099142615626133e-06, "loss": 0.5724, "step": 6774, "task_loss": 0.16713391244411469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3225492238998413, "epoch": 5.73, "learning_rate": 9.093104697500302e-06, "loss": 0.3695, "step": 6775, "task_loss": 0.16621845960617065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4366122782230377, "epoch": 5.73, "learning_rate": 9.087066779374471e-06, "loss": 0.4001, "step": 6776, "task_loss": 0.4593633711338043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37281203269958496, "epoch": 5.73, "learning_rate": 9.081028861248642e-06, "loss": 0.4112, "step": 6777, "task_loss": 0.3386434316635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3283293843269348, "epoch": 5.73, "learning_rate": 9.074990943122812e-06, "loss": 0.3803, "step": 6778, "task_loss": 0.4193355441093445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4778842329978943, "epoch": 5.73, "learning_rate": 9.068953024996981e-06, "loss": 0.4565, "step": 6779, "task_loss": 0.6542305946350098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47622302174568176, "epoch": 5.73, "learning_rate": 9.062915106871152e-06, "loss": 0.4627, "step": 6780, "task_loss": 0.20711469650268555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47977280616760254, "epoch": 5.73, "learning_rate": 9.05687718874532e-06, "loss": 0.5215, "step": 6781, "task_loss": 1.0503017902374268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29045918583869934, "epoch": 5.73, "learning_rate": 9.050839270619491e-06, "loss": 0.4797, "step": 6782, "task_loss": 0.5339498519897461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36565786600112915, "epoch": 5.73, "learning_rate": 9.04480135249366e-06, "loss": 0.424, "step": 6783, "task_loss": 0.8045396208763123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47113776206970215, "epoch": 5.73, "learning_rate": 9.03876343436783e-06, "loss": 0.4401, "step": 6784, "task_loss": 0.48665308952331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3332441747188568, "epoch": 5.73, "learning_rate": 9.032725516242001e-06, "loss": 0.3482, "step": 6785, "task_loss": 0.5813766121864319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6250608563423157, "epoch": 5.74, "learning_rate": 9.02668759811617e-06, "loss": 0.4354, "step": 6786, "task_loss": 0.7294788360595703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26129233837127686, "epoch": 5.74, "learning_rate": 9.020649679990339e-06, "loss": 0.4388, "step": 6787, "task_loss": 0.4819594919681549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45985138416290283, "epoch": 5.74, "learning_rate": 9.01461176186451e-06, "loss": 0.4855, "step": 6788, "task_loss": 0.851227343082428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17006535828113556, "epoch": 5.74, "learning_rate": 9.00857384373868e-06, "loss": 0.5305, "step": 6789, "task_loss": 0.11492697149515152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5755500793457031, "epoch": 5.74, "learning_rate": 9.002535925612849e-06, "loss": 0.4784, "step": 6790, "task_loss": 0.6575520038604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43961772322654724, "epoch": 5.74, "learning_rate": 8.996498007487018e-06, "loss": 0.47, "step": 6791, "task_loss": 0.786239504814148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2823607921600342, "epoch": 5.74, "learning_rate": 8.990460089361189e-06, "loss": 0.5403, "step": 6792, "task_loss": 0.6331005692481995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43732666969299316, "epoch": 5.74, "learning_rate": 8.984422171235359e-06, "loss": 0.4802, "step": 6793, "task_loss": 0.22647225856781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.680653989315033, "epoch": 5.74, "learning_rate": 8.978384253109528e-06, "loss": 0.4763, "step": 6794, "task_loss": 0.7984157800674438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3626134395599365, "epoch": 5.74, "learning_rate": 8.972346334983699e-06, "loss": 0.435, "step": 6795, "task_loss": 1.2877767086029053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5412952899932861, "epoch": 5.74, "learning_rate": 8.966308416857867e-06, "loss": 0.4673, "step": 6796, "task_loss": 0.7032785415649414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42447182536125183, "epoch": 5.75, "learning_rate": 8.960270498732038e-06, "loss": 0.4847, "step": 6797, "task_loss": 0.34944894909858704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4984859228134155, "epoch": 5.75, "learning_rate": 8.954232580606207e-06, "loss": 0.4616, "step": 6798, "task_loss": 0.4432244598865509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8174580931663513, "epoch": 5.75, "learning_rate": 8.948194662480377e-06, "loss": 0.5702, "step": 6799, "task_loss": 0.366484671831131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21612553298473358, "epoch": 5.75, "learning_rate": 8.942156744354548e-06, "loss": 0.4254, "step": 6800, "task_loss": 0.7730886936187744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5667479634284973, "epoch": 5.75, "learning_rate": 8.936118826228717e-06, "loss": 0.4927, "step": 6801, "task_loss": 1.341439962387085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7919966578483582, "epoch": 5.75, "learning_rate": 8.930080908102886e-06, "loss": 0.4646, "step": 6802, "task_loss": 0.6175392866134644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26226508617401123, "epoch": 5.75, "learning_rate": 8.924042989977056e-06, "loss": 0.5002, "step": 6803, "task_loss": 0.496273934841156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7612067461013794, "epoch": 5.75, "learning_rate": 8.918005071851227e-06, "loss": 0.4662, "step": 6804, "task_loss": 0.5117945075035095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4614354074001312, "epoch": 5.75, "learning_rate": 8.911967153725396e-06, "loss": 0.5291, "step": 6805, "task_loss": 1.041252613067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37100714445114136, "epoch": 5.75, "learning_rate": 8.905929235599565e-06, "loss": 0.4614, "step": 6806, "task_loss": 1.1394340991973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3699692487716675, "epoch": 5.75, "learning_rate": 8.899891317473735e-06, "loss": 0.4718, "step": 6807, "task_loss": 0.21843348443508148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4842161536216736, "epoch": 5.75, "learning_rate": 8.893853399347906e-06, "loss": 0.5441, "step": 6808, "task_loss": 1.2731599807739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3685823678970337, "epoch": 5.76, "learning_rate": 8.887815481222075e-06, "loss": 0.4217, "step": 6809, "task_loss": 0.3689992129802704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45533621311187744, "epoch": 5.76, "learning_rate": 8.881777563096245e-06, "loss": 0.5183, "step": 6810, "task_loss": 0.11185700446367264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5002017617225647, "epoch": 5.76, "learning_rate": 8.875739644970414e-06, "loss": 0.4604, "step": 6811, "task_loss": 0.32641032338142395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3219699561595917, "epoch": 5.76, "learning_rate": 8.869701726844585e-06, "loss": 0.3196, "step": 6812, "task_loss": 0.45952340960502625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25067338347435, "epoch": 5.76, "learning_rate": 8.863663808718754e-06, "loss": 0.4132, "step": 6813, "task_loss": 0.4273223280906677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21826279163360596, "epoch": 5.76, "learning_rate": 8.857625890592924e-06, "loss": 0.2463, "step": 6814, "task_loss": 0.7874842882156372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6446139216423035, "epoch": 5.76, "learning_rate": 8.851587972467095e-06, "loss": 0.5081, "step": 6815, "task_loss": 1.2316144704818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4131598472595215, "epoch": 5.76, "learning_rate": 8.845550054341264e-06, "loss": 0.6092, "step": 6816, "task_loss": 0.7948697805404663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5868838429450989, "epoch": 5.76, "learning_rate": 8.839512136215432e-06, "loss": 0.5729, "step": 6817, "task_loss": 1.8779619932174683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3616679310798645, "epoch": 5.76, "learning_rate": 8.833474218089603e-06, "loss": 0.4276, "step": 6818, "task_loss": 0.39233511686325073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5627244710922241, "epoch": 5.76, "learning_rate": 8.827436299963774e-06, "loss": 0.4294, "step": 6819, "task_loss": 0.9320204854011536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5246750116348267, "epoch": 5.76, "learning_rate": 8.821398381837944e-06, "loss": 0.4661, "step": 6820, "task_loss": 0.41106876730918884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17428657412528992, "epoch": 5.77, "learning_rate": 8.815360463712111e-06, "loss": 0.3551, "step": 6821, "task_loss": 0.3434470295906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1982969343662262, "epoch": 5.77, "learning_rate": 8.809322545586282e-06, "loss": 0.4446, "step": 6822, "task_loss": 0.1066550761461258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3522729277610779, "epoch": 5.77, "learning_rate": 8.803284627460452e-06, "loss": 0.565, "step": 6823, "task_loss": 0.7793720364570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6030377149581909, "epoch": 5.77, "learning_rate": 8.797246709334623e-06, "loss": 0.4068, "step": 6824, "task_loss": 0.13307474553585052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6200074553489685, "epoch": 5.77, "learning_rate": 8.791208791208792e-06, "loss": 0.404, "step": 6825, "task_loss": 1.0221736431121826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46882468461990356, "epoch": 5.77, "learning_rate": 8.78517087308296e-06, "loss": 0.4568, "step": 6826, "task_loss": 0.7106450796127319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37567901611328125, "epoch": 5.77, "learning_rate": 8.779132954957131e-06, "loss": 0.4523, "step": 6827, "task_loss": 0.24244384467601776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9290674924850464, "epoch": 5.77, "learning_rate": 8.773095036831302e-06, "loss": 0.59, "step": 6828, "task_loss": 1.031914234161377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36427628993988037, "epoch": 5.77, "learning_rate": 8.76705711870547e-06, "loss": 0.456, "step": 6829, "task_loss": 0.2579890787601471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2719225287437439, "epoch": 5.77, "learning_rate": 8.76101920057964e-06, "loss": 0.4575, "step": 6830, "task_loss": 0.061803411692380905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.344133198261261, "epoch": 5.77, "learning_rate": 8.75498128245381e-06, "loss": 0.5546, "step": 6831, "task_loss": 0.3911244571208954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8373414278030396, "epoch": 5.77, "learning_rate": 8.74894336432798e-06, "loss": 0.5159, "step": 6832, "task_loss": 0.920238196849823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2709845304489136, "epoch": 5.78, "learning_rate": 8.74290544620215e-06, "loss": 0.5485, "step": 6833, "task_loss": 1.269283413887024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4290524125099182, "epoch": 5.78, "learning_rate": 8.73686752807632e-06, "loss": 0.4162, "step": 6834, "task_loss": 0.7809634804725647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41308027505874634, "epoch": 5.78, "learning_rate": 8.73082960995049e-06, "loss": 0.5419, "step": 6835, "task_loss": 0.9451996684074402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34122925996780396, "epoch": 5.78, "learning_rate": 8.724791691824658e-06, "loss": 0.4626, "step": 6836, "task_loss": 0.22515572607517242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3117615282535553, "epoch": 5.78, "learning_rate": 8.718753773698829e-06, "loss": 0.3745, "step": 6837, "task_loss": 0.4015805125236511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47680628299713135, "epoch": 5.78, "learning_rate": 8.712715855573e-06, "loss": 0.5481, "step": 6838, "task_loss": 0.6055381894111633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3258705139160156, "epoch": 5.78, "learning_rate": 8.70667793744717e-06, "loss": 0.3998, "step": 6839, "task_loss": 1.194960117340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6732715368270874, "epoch": 5.78, "learning_rate": 8.700640019321337e-06, "loss": 0.5333, "step": 6840, "task_loss": 1.1332221031188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37896040081977844, "epoch": 5.78, "learning_rate": 8.694602101195508e-06, "loss": 0.3891, "step": 6841, "task_loss": 0.7380682229995728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21282166242599487, "epoch": 5.78, "learning_rate": 8.688564183069678e-06, "loss": 0.4203, "step": 6842, "task_loss": 0.21935594081878662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3921148180961609, "epoch": 5.78, "learning_rate": 8.682526264943849e-06, "loss": 0.3731, "step": 6843, "task_loss": 0.18771642446517944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6807649731636047, "epoch": 5.78, "learning_rate": 8.676488346818018e-06, "loss": 0.476, "step": 6844, "task_loss": 1.7016767263412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2680549621582031, "epoch": 5.79, "learning_rate": 8.670450428692186e-06, "loss": 0.3536, "step": 6845, "task_loss": 0.39849528670310974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4472857117652893, "epoch": 5.79, "learning_rate": 8.664412510566357e-06, "loss": 0.4865, "step": 6846, "task_loss": 0.3608029782772064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3966866135597229, "epoch": 5.79, "learning_rate": 8.658374592440528e-06, "loss": 0.4353, "step": 6847, "task_loss": 0.16099335253238678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3421272933483124, "epoch": 5.79, "learning_rate": 8.652336674314696e-06, "loss": 0.4863, "step": 6848, "task_loss": 0.5604204535484314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31521034240722656, "epoch": 5.79, "learning_rate": 8.646298756188867e-06, "loss": 0.3654, "step": 6849, "task_loss": 1.3463664054870605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.289031982421875, "epoch": 5.79, "learning_rate": 8.640260838063036e-06, "loss": 0.369, "step": 6850, "task_loss": 0.21315786242485046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3626205325126648, "epoch": 5.79, "learning_rate": 8.634222919937206e-06, "loss": 0.358, "step": 6851, "task_loss": 0.3071889281272888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23373094201087952, "epoch": 5.79, "learning_rate": 8.628185001811375e-06, "loss": 0.3825, "step": 6852, "task_loss": 0.38108542561531067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3998470902442932, "epoch": 5.79, "learning_rate": 8.622147083685546e-06, "loss": 0.4349, "step": 6853, "task_loss": 1.0409437417984009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4545266330242157, "epoch": 5.79, "learning_rate": 8.616109165559716e-06, "loss": 0.5326, "step": 6854, "task_loss": 0.8539249897003174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4730611741542816, "epoch": 5.79, "learning_rate": 8.610071247433885e-06, "loss": 0.4485, "step": 6855, "task_loss": 0.7993516325950623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5145604014396667, "epoch": 5.79, "learning_rate": 8.604033329308054e-06, "loss": 0.3723, "step": 6856, "task_loss": 0.6746289730072021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47001469135284424, "epoch": 5.8, "learning_rate": 8.597995411182225e-06, "loss": 0.484, "step": 6857, "task_loss": 0.7576054930686951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5502407550811768, "epoch": 5.8, "learning_rate": 8.591957493056395e-06, "loss": 0.5648, "step": 6858, "task_loss": 0.867617666721344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.237340047955513, "epoch": 5.8, "learning_rate": 8.585919574930564e-06, "loss": 0.3823, "step": 6859, "task_loss": 0.8760807514190674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5560070872306824, "epoch": 5.8, "learning_rate": 8.579881656804733e-06, "loss": 0.4407, "step": 6860, "task_loss": 0.9598127603530884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2689327895641327, "epoch": 5.8, "learning_rate": 8.573843738678904e-06, "loss": 0.3381, "step": 6861, "task_loss": 0.6622861623764038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.526469886302948, "epoch": 5.8, "learning_rate": 8.567805820553074e-06, "loss": 0.4803, "step": 6862, "task_loss": 1.2142293453216553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34447401762008667, "epoch": 5.8, "learning_rate": 8.561767902427243e-06, "loss": 0.3621, "step": 6863, "task_loss": 1.0713777542114258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4919794499874115, "epoch": 5.8, "learning_rate": 8.555729984301414e-06, "loss": 0.425, "step": 6864, "task_loss": 0.9440869092941284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.11292891204357147, "epoch": 5.8, "learning_rate": 8.549692066175583e-06, "loss": 0.366, "step": 6865, "task_loss": 0.010360435582697392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5620462894439697, "epoch": 5.8, "learning_rate": 8.543654148049753e-06, "loss": 0.3998, "step": 6866, "task_loss": 0.759792685508728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29607874155044556, "epoch": 5.8, "learning_rate": 8.537616229923922e-06, "loss": 0.5225, "step": 6867, "task_loss": 0.45389482378959656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5657116174697876, "epoch": 5.81, "learning_rate": 8.531578311798093e-06, "loss": 0.4715, "step": 6868, "task_loss": 1.5670477151870728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4624946713447571, "epoch": 5.81, "learning_rate": 8.525540393672263e-06, "loss": 0.49, "step": 6869, "task_loss": 0.7926468849182129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4558680057525635, "epoch": 5.81, "learning_rate": 8.519502475546432e-06, "loss": 0.4301, "step": 6870, "task_loss": 0.7351441383361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3619242310523987, "epoch": 5.81, "learning_rate": 8.513464557420601e-06, "loss": 0.4739, "step": 6871, "task_loss": 0.4874288737773895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34402012825012207, "epoch": 5.81, "learning_rate": 8.507426639294772e-06, "loss": 0.4631, "step": 6872, "task_loss": 1.2328965663909912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4837105870246887, "epoch": 5.81, "learning_rate": 8.501388721168942e-06, "loss": 0.4849, "step": 6873, "task_loss": 0.40857362747192383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31597450375556946, "epoch": 5.81, "learning_rate": 8.495350803043111e-06, "loss": 0.317, "step": 6874, "task_loss": 0.38574692606925964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31182849407196045, "epoch": 5.81, "learning_rate": 8.48931288491728e-06, "loss": 0.4668, "step": 6875, "task_loss": 0.3975023627281189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5948579907417297, "epoch": 5.81, "learning_rate": 8.48327496679145e-06, "loss": 0.4358, "step": 6876, "task_loss": 0.3158024251461029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3936498761177063, "epoch": 5.81, "learning_rate": 8.477237048665621e-06, "loss": 0.3592, "step": 6877, "task_loss": 0.43681928515434265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42685794830322266, "epoch": 5.81, "learning_rate": 8.47119913053979e-06, "loss": 0.4931, "step": 6878, "task_loss": 0.5872896313667297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4572516679763794, "epoch": 5.81, "learning_rate": 8.46516121241396e-06, "loss": 0.4981, "step": 6879, "task_loss": 1.0090363025665283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39667075872421265, "epoch": 5.82, "learning_rate": 8.45912329428813e-06, "loss": 0.4918, "step": 6880, "task_loss": 0.05588139221072197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3507067859172821, "epoch": 5.82, "learning_rate": 8.4530853761623e-06, "loss": 0.4531, "step": 6881, "task_loss": 0.9461204409599304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41787266731262207, "epoch": 5.82, "learning_rate": 8.447047458036469e-06, "loss": 0.4707, "step": 6882, "task_loss": 0.8120055794715881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3385689854621887, "epoch": 5.82, "learning_rate": 8.44100953991064e-06, "loss": 0.3802, "step": 6883, "task_loss": 0.585078775882721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3481976389884949, "epoch": 5.82, "learning_rate": 8.43497162178481e-06, "loss": 0.2832, "step": 6884, "task_loss": 0.6310349106788635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2829192876815796, "epoch": 5.82, "learning_rate": 8.428933703658979e-06, "loss": 0.4389, "step": 6885, "task_loss": 0.2057802379131317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38993045687675476, "epoch": 5.82, "learning_rate": 8.422895785533148e-06, "loss": 0.5308, "step": 6886, "task_loss": 0.7504974007606506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47812265157699585, "epoch": 5.82, "learning_rate": 8.416857867407318e-06, "loss": 0.4516, "step": 6887, "task_loss": 0.9430537819862366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36125704646110535, "epoch": 5.82, "learning_rate": 8.410819949281489e-06, "loss": 0.3086, "step": 6888, "task_loss": 0.40589481592178345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6667785048484802, "epoch": 5.82, "learning_rate": 8.40478203115566e-06, "loss": 0.4718, "step": 6889, "task_loss": 0.5275201797485352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6534362435340881, "epoch": 5.82, "learning_rate": 8.398744113029827e-06, "loss": 0.5837, "step": 6890, "task_loss": 0.856420636177063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3661806881427765, "epoch": 5.82, "learning_rate": 8.392706194903997e-06, "loss": 0.4569, "step": 6891, "task_loss": 0.4163728654384613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2615051865577698, "epoch": 5.83, "learning_rate": 8.386668276778168e-06, "loss": 0.4235, "step": 6892, "task_loss": 0.01779782585799694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.307868629693985, "epoch": 5.83, "learning_rate": 8.380630358652338e-06, "loss": 0.4214, "step": 6893, "task_loss": 0.17824146151542664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2924744486808777, "epoch": 5.83, "learning_rate": 8.374592440526507e-06, "loss": 0.4645, "step": 6894, "task_loss": 1.3725001811981201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2637721300125122, "epoch": 5.83, "learning_rate": 8.368554522400676e-06, "loss": 0.3707, "step": 6895, "task_loss": 0.7852872014045715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28147104382514954, "epoch": 5.83, "learning_rate": 8.362516604274847e-06, "loss": 0.6144, "step": 6896, "task_loss": 0.68956059217453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3505222797393799, "epoch": 5.83, "learning_rate": 8.356478686149015e-06, "loss": 0.5321, "step": 6897, "task_loss": 0.730493426322937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45047926902770996, "epoch": 5.83, "learning_rate": 8.350440768023186e-06, "loss": 0.4641, "step": 6898, "task_loss": 0.8506103157997131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21070575714111328, "epoch": 5.83, "learning_rate": 8.344402849897357e-06, "loss": 0.351, "step": 6899, "task_loss": 0.04980315640568733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6946448683738708, "epoch": 5.83, "learning_rate": 8.338364931771525e-06, "loss": 0.4405, "step": 6900, "task_loss": 1.2568155527114868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28478628396987915, "epoch": 5.83, "learning_rate": 8.332327013645694e-06, "loss": 0.5711, "step": 6901, "task_loss": 0.5820723176002502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5324742794036865, "epoch": 5.83, "learning_rate": 8.326289095519865e-06, "loss": 0.4903, "step": 6902, "task_loss": 0.49629703164100647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34382620453834534, "epoch": 5.83, "learning_rate": 8.320251177394036e-06, "loss": 0.4026, "step": 6903, "task_loss": 1.1378055810928345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34479331970214844, "epoch": 5.84, "learning_rate": 8.314213259268206e-06, "loss": 0.3242, "step": 6904, "task_loss": 0.8123185634613037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22207149863243103, "epoch": 5.84, "learning_rate": 8.308175341142373e-06, "loss": 0.373, "step": 6905, "task_loss": 0.20571757853031158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2800230085849762, "epoch": 5.84, "learning_rate": 8.302137423016544e-06, "loss": 0.411, "step": 6906, "task_loss": 0.383259654045105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2858198583126068, "epoch": 5.84, "learning_rate": 8.296099504890714e-06, "loss": 0.4757, "step": 6907, "task_loss": 0.047231972217559814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43270570039749146, "epoch": 5.84, "learning_rate": 8.290061586764885e-06, "loss": 0.4351, "step": 6908, "task_loss": 0.49198442697525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3998495936393738, "epoch": 5.84, "learning_rate": 8.284023668639054e-06, "loss": 0.4562, "step": 6909, "task_loss": 0.6213709712028503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4395737648010254, "epoch": 5.84, "learning_rate": 8.277985750513223e-06, "loss": 0.4554, "step": 6910, "task_loss": 0.7981939911842346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39473816752433777, "epoch": 5.84, "learning_rate": 8.271947832387393e-06, "loss": 0.4472, "step": 6911, "task_loss": 0.5105046629905701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6238789558410645, "epoch": 5.84, "learning_rate": 8.265909914261564e-06, "loss": 0.613, "step": 6912, "task_loss": 1.0496739149093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4039442837238312, "epoch": 5.84, "learning_rate": 8.259871996135733e-06, "loss": 0.4631, "step": 6913, "task_loss": 1.0193341970443726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48370087146759033, "epoch": 5.84, "learning_rate": 8.253834078009903e-06, "loss": 0.3788, "step": 6914, "task_loss": 0.46260449290275574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29194003343582153, "epoch": 5.84, "learning_rate": 8.247796159884072e-06, "loss": 0.3284, "step": 6915, "task_loss": 0.6137953996658325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41501322388648987, "epoch": 5.85, "learning_rate": 8.241758241758243e-06, "loss": 0.3267, "step": 6916, "task_loss": 0.3210653066635132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3291185796260834, "epoch": 5.85, "learning_rate": 8.235720323632412e-06, "loss": 0.3794, "step": 6917, "task_loss": 0.24007408320903778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30583199858665466, "epoch": 5.85, "learning_rate": 8.229682405506582e-06, "loss": 0.4545, "step": 6918, "task_loss": 0.649651825428009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5069676637649536, "epoch": 5.85, "learning_rate": 8.223644487380751e-06, "loss": 0.5291, "step": 6919, "task_loss": 0.5986087322235107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4521918296813965, "epoch": 5.85, "learning_rate": 8.217606569254922e-06, "loss": 0.3647, "step": 6920, "task_loss": 1.0398566722869873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48589885234832764, "epoch": 5.85, "learning_rate": 8.21156865112909e-06, "loss": 0.4215, "step": 6921, "task_loss": 0.5031166076660156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3149758577346802, "epoch": 5.85, "learning_rate": 8.205530733003261e-06, "loss": 0.3585, "step": 6922, "task_loss": 0.7229981422424316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5669782161712646, "epoch": 5.85, "learning_rate": 8.199492814877432e-06, "loss": 0.4234, "step": 6923, "task_loss": 0.703351616859436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3199085593223572, "epoch": 5.85, "learning_rate": 8.1934548967516e-06, "loss": 0.3994, "step": 6924, "task_loss": 0.3237999677658081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4833778142929077, "epoch": 5.85, "learning_rate": 8.18741697862577e-06, "loss": 0.6103, "step": 6925, "task_loss": 0.32273244857788086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31481650471687317, "epoch": 5.85, "learning_rate": 8.18137906049994e-06, "loss": 0.4179, "step": 6926, "task_loss": 0.3099425435066223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6069097518920898, "epoch": 5.85, "learning_rate": 8.17534114237411e-06, "loss": 0.3595, "step": 6927, "task_loss": 0.3710533678531647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44832298159599304, "epoch": 5.86, "learning_rate": 8.16930322424828e-06, "loss": 0.4228, "step": 6928, "task_loss": 0.6668354868888855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3236726224422455, "epoch": 5.86, "learning_rate": 8.163265306122448e-06, "loss": 0.4571, "step": 6929, "task_loss": 0.49051401019096375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4685375392436981, "epoch": 5.86, "learning_rate": 8.157227387996619e-06, "loss": 0.4664, "step": 6930, "task_loss": 0.18978945910930634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5885454416275024, "epoch": 5.86, "learning_rate": 8.15118946987079e-06, "loss": 0.5502, "step": 6931, "task_loss": 1.0149588584899902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3576107621192932, "epoch": 5.86, "learning_rate": 8.145151551744958e-06, "loss": 0.5249, "step": 6932, "task_loss": 1.1126909255981445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4122850298881531, "epoch": 5.86, "learning_rate": 8.139113633619129e-06, "loss": 0.4226, "step": 6933, "task_loss": 0.3238452970981598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4157611131668091, "epoch": 5.86, "learning_rate": 8.133075715493298e-06, "loss": 0.5139, "step": 6934, "task_loss": 0.25755226612091064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5746082067489624, "epoch": 5.86, "learning_rate": 8.127037797367468e-06, "loss": 0.3502, "step": 6935, "task_loss": 1.0045945644378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.681090772151947, "epoch": 5.86, "learning_rate": 8.120999879241637e-06, "loss": 0.4626, "step": 6936, "task_loss": 0.7141767740249634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3222769498825073, "epoch": 5.86, "learning_rate": 8.114961961115808e-06, "loss": 0.3979, "step": 6937, "task_loss": 0.4712897837162018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31626230478286743, "epoch": 5.86, "learning_rate": 8.108924042989978e-06, "loss": 0.4332, "step": 6938, "task_loss": 0.0822918638586998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29413169622421265, "epoch": 5.87, "learning_rate": 8.102886124864147e-06, "loss": 0.3332, "step": 6939, "task_loss": 0.5898066759109497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4244046211242676, "epoch": 5.87, "learning_rate": 8.096848206738316e-06, "loss": 0.39, "step": 6940, "task_loss": 0.31856802105903625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3980801999568939, "epoch": 5.87, "learning_rate": 8.090810288612487e-06, "loss": 0.4856, "step": 6941, "task_loss": 0.845194399356842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35983023047447205, "epoch": 5.87, "learning_rate": 8.084772370486657e-06, "loss": 0.5219, "step": 6942, "task_loss": 0.12037420272827148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44201192259788513, "epoch": 5.87, "learning_rate": 8.078734452360826e-06, "loss": 0.632, "step": 6943, "task_loss": 0.3167547285556793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3107828199863434, "epoch": 5.87, "learning_rate": 8.072696534234995e-06, "loss": 0.4113, "step": 6944, "task_loss": 0.7686514854431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2514476180076599, "epoch": 5.87, "learning_rate": 8.066658616109166e-06, "loss": 0.4024, "step": 6945, "task_loss": 0.7811295390129089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22642725706100464, "epoch": 5.87, "learning_rate": 8.060620697983336e-06, "loss": 0.4253, "step": 6946, "task_loss": 0.45459866523742676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37283703684806824, "epoch": 5.87, "learning_rate": 8.054582779857505e-06, "loss": 0.3389, "step": 6947, "task_loss": 0.5623932480812073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6369295716285706, "epoch": 5.87, "learning_rate": 8.048544861731676e-06, "loss": 0.4503, "step": 6948, "task_loss": 1.0879145860671997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22271110117435455, "epoch": 5.87, "learning_rate": 8.042506943605845e-06, "loss": 0.3224, "step": 6949, "task_loss": 0.2093796730041504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4541492760181427, "epoch": 5.87, "learning_rate": 8.036469025480015e-06, "loss": 0.4003, "step": 6950, "task_loss": 0.9150739908218384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6458548903465271, "epoch": 5.88, "learning_rate": 8.030431107354184e-06, "loss": 0.5135, "step": 6951, "task_loss": 1.5363305807113647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3882969617843628, "epoch": 5.88, "learning_rate": 8.024393189228355e-06, "loss": 0.5052, "step": 6952, "task_loss": 0.419509619474411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5319870710372925, "epoch": 5.88, "learning_rate": 8.018355271102525e-06, "loss": 0.3907, "step": 6953, "task_loss": 1.2335803508758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20668920874595642, "epoch": 5.88, "learning_rate": 8.012317352976694e-06, "loss": 0.4689, "step": 6954, "task_loss": 0.02250358648598194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3774451017379761, "epoch": 5.88, "learning_rate": 8.006279434850863e-06, "loss": 0.3553, "step": 6955, "task_loss": 0.6751906275749207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31259045004844666, "epoch": 5.88, "learning_rate": 8.000241516725033e-06, "loss": 0.4625, "step": 6956, "task_loss": 0.5203465819358826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5578932166099548, "epoch": 5.88, "learning_rate": 7.994203598599204e-06, "loss": 0.5615, "step": 6957, "task_loss": 0.7862531542778015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3468993902206421, "epoch": 5.88, "learning_rate": 7.988165680473373e-06, "loss": 0.4295, "step": 6958, "task_loss": 0.5802791118621826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43521279096603394, "epoch": 5.88, "learning_rate": 7.982127762347542e-06, "loss": 0.4418, "step": 6959, "task_loss": 0.6512595415115356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31035059690475464, "epoch": 5.88, "learning_rate": 7.976089844221712e-06, "loss": 0.4464, "step": 6960, "task_loss": 1.3289785385131836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5049430727958679, "epoch": 5.88, "learning_rate": 7.970051926095883e-06, "loss": 0.4104, "step": 6961, "task_loss": 0.5964776873588562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31972306966781616, "epoch": 5.88, "learning_rate": 7.964014007970052e-06, "loss": 0.4856, "step": 6962, "task_loss": 0.7360772490501404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3082845211029053, "epoch": 5.89, "learning_rate": 7.957976089844222e-06, "loss": 0.4448, "step": 6963, "task_loss": 0.47658249735832214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1652650535106659, "epoch": 5.89, "learning_rate": 7.951938171718391e-06, "loss": 0.4013, "step": 6964, "task_loss": 0.12744301557540894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.384038507938385, "epoch": 5.89, "learning_rate": 7.945900253592562e-06, "loss": 0.5222, "step": 6965, "task_loss": 1.5211560726165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2479127049446106, "epoch": 5.89, "learning_rate": 7.93986233546673e-06, "loss": 0.451, "step": 6966, "task_loss": 0.10588888823986053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43977290391921997, "epoch": 5.89, "learning_rate": 7.933824417340901e-06, "loss": 0.4231, "step": 6967, "task_loss": 0.7373864650726318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22163361310958862, "epoch": 5.89, "learning_rate": 7.927786499215072e-06, "loss": 0.4501, "step": 6968, "task_loss": 0.41722995042800903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4625582695007324, "epoch": 5.89, "learning_rate": 7.92174858108924e-06, "loss": 0.4491, "step": 6969, "task_loss": 1.1094281673431396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15898041427135468, "epoch": 5.89, "learning_rate": 7.91571066296341e-06, "loss": 0.2682, "step": 6970, "task_loss": 0.13250593841075897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.452730655670166, "epoch": 5.89, "learning_rate": 7.90967274483758e-06, "loss": 0.4133, "step": 6971, "task_loss": 0.25173208117485046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4305339753627777, "epoch": 5.89, "learning_rate": 7.90363482671175e-06, "loss": 0.4637, "step": 6972, "task_loss": 1.0811203718185425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7033300399780273, "epoch": 5.89, "learning_rate": 7.897596908585921e-06, "loss": 0.5198, "step": 6973, "task_loss": 0.6781212687492371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6664816737174988, "epoch": 5.89, "learning_rate": 7.891558990460088e-06, "loss": 0.5443, "step": 6974, "task_loss": 1.5009799003601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46847623586654663, "epoch": 5.9, "learning_rate": 7.885521072334259e-06, "loss": 0.4898, "step": 6975, "task_loss": 0.7659034132957458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4427796006202698, "epoch": 5.9, "learning_rate": 7.87948315420843e-06, "loss": 0.5193, "step": 6976, "task_loss": 0.6218733191490173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4531594514846802, "epoch": 5.9, "learning_rate": 7.8734452360826e-06, "loss": 0.6058, "step": 6977, "task_loss": 0.8583329319953918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39716672897338867, "epoch": 5.9, "learning_rate": 7.867407317956769e-06, "loss": 0.5949, "step": 6978, "task_loss": 0.5925692319869995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20717518031597137, "epoch": 5.9, "learning_rate": 7.861369399830938e-06, "loss": 0.4199, "step": 6979, "task_loss": 0.21700134873390198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3442147970199585, "epoch": 5.9, "learning_rate": 7.855331481705108e-06, "loss": 0.396, "step": 6980, "task_loss": 0.5193817019462585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39888426661491394, "epoch": 5.9, "learning_rate": 7.849293563579279e-06, "loss": 0.4422, "step": 6981, "task_loss": 0.5411069989204407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24404998123645782, "epoch": 5.9, "learning_rate": 7.843255645453448e-06, "loss": 0.3713, "step": 6982, "task_loss": 0.2543105185031891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29963618516921997, "epoch": 5.9, "learning_rate": 7.837217727327619e-06, "loss": 0.5896, "step": 6983, "task_loss": 0.38067957758903503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37129101157188416, "epoch": 5.9, "learning_rate": 7.831179809201787e-06, "loss": 0.3956, "step": 6984, "task_loss": 0.7263840436935425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4122962951660156, "epoch": 5.9, "learning_rate": 7.825141891075958e-06, "loss": 0.4524, "step": 6985, "task_loss": 0.2442743182182312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5730170011520386, "epoch": 5.9, "learning_rate": 7.819103972950127e-06, "loss": 0.3797, "step": 6986, "task_loss": 0.5021531581878662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.377128005027771, "epoch": 5.91, "learning_rate": 7.813066054824297e-06, "loss": 0.4749, "step": 6987, "task_loss": 0.20708145201206207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.401876837015152, "epoch": 5.91, "learning_rate": 7.807028136698468e-06, "loss": 0.4051, "step": 6988, "task_loss": 1.0245081186294556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3383440375328064, "epoch": 5.91, "learning_rate": 7.800990218572637e-06, "loss": 0.3659, "step": 6989, "task_loss": 0.5507072806358337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5728752613067627, "epoch": 5.91, "learning_rate": 7.794952300446806e-06, "loss": 0.4041, "step": 6990, "task_loss": 1.4205158948898315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3251244127750397, "epoch": 5.91, "learning_rate": 7.788914382320976e-06, "loss": 0.3733, "step": 6991, "task_loss": 0.8480383157730103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6370258331298828, "epoch": 5.91, "learning_rate": 7.782876464195147e-06, "loss": 0.4515, "step": 6992, "task_loss": 0.43635597825050354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4869873523712158, "epoch": 5.91, "learning_rate": 7.776838546069316e-06, "loss": 0.4822, "step": 6993, "task_loss": 0.5614396333694458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47066739201545715, "epoch": 5.91, "learning_rate": 7.770800627943485e-06, "loss": 0.4863, "step": 6994, "task_loss": 1.292590856552124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5118110179901123, "epoch": 5.91, "learning_rate": 7.764762709817655e-06, "loss": 0.5562, "step": 6995, "task_loss": 1.192577600479126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27334535121917725, "epoch": 5.91, "learning_rate": 7.758724791691826e-06, "loss": 0.3563, "step": 6996, "task_loss": 0.6750822067260742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3604450821876526, "epoch": 5.91, "learning_rate": 7.752686873565995e-06, "loss": 0.4196, "step": 6997, "task_loss": 0.9979363679885864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.485706627368927, "epoch": 5.91, "learning_rate": 7.746648955440165e-06, "loss": 0.5278, "step": 6998, "task_loss": 0.9236827492713928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5276333093643188, "epoch": 5.92, "learning_rate": 7.740611037314334e-06, "loss": 0.5335, "step": 6999, "task_loss": 0.499744176864624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3345971703529358, "epoch": 5.92, "learning_rate": 7.734573119188505e-06, "loss": 0.414, "step": 7000, "task_loss": 0.4801103174686432 }, { "epoch": 5.92, "eval_accuracy": 0.9103762376237624, "eval_loss": 0.2805521488189697, "eval_runtime": 227.887, "eval_samples_per_second": 110.801, "eval_steps_per_second": 0.869, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37046775221824646, "epoch": 5.92, "learning_rate": 7.728535201062674e-06, "loss": 0.3694, "step": 7001, "task_loss": 1.0790047645568848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43704119324684143, "epoch": 5.92, "learning_rate": 7.722497282936844e-06, "loss": 0.498, "step": 7002, "task_loss": 0.2983787953853607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19424527883529663, "epoch": 5.92, "learning_rate": 7.716459364811015e-06, "loss": 0.3969, "step": 7003, "task_loss": 1.0791457891464233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.580791175365448, "epoch": 5.92, "learning_rate": 7.710421446685184e-06, "loss": 0.4919, "step": 7004, "task_loss": 0.42992478609085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3083846867084503, "epoch": 5.92, "learning_rate": 7.704383528559352e-06, "loss": 0.4861, "step": 7005, "task_loss": 0.05863397940993309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20193368196487427, "epoch": 5.92, "learning_rate": 7.698345610433523e-06, "loss": 0.4803, "step": 7006, "task_loss": 0.04005083814263344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5419428944587708, "epoch": 5.92, "learning_rate": 7.692307692307694e-06, "loss": 0.4819, "step": 7007, "task_loss": 0.8839797973632812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49520108103752136, "epoch": 5.92, "learning_rate": 7.686269774181862e-06, "loss": 0.5003, "step": 7008, "task_loss": 0.9040836691856384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24556726217269897, "epoch": 5.92, "learning_rate": 7.680231856056031e-06, "loss": 0.3732, "step": 7009, "task_loss": 0.4811991751194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7885105609893799, "epoch": 5.93, "learning_rate": 7.674193937930202e-06, "loss": 0.435, "step": 7010, "task_loss": 1.1441720724105835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32612931728363037, "epoch": 5.93, "learning_rate": 7.668156019804372e-06, "loss": 0.3565, "step": 7011, "task_loss": 0.26004287600517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3905634880065918, "epoch": 5.93, "learning_rate": 7.662118101678541e-06, "loss": 0.4717, "step": 7012, "task_loss": 0.5330042243003845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44006723165512085, "epoch": 5.93, "learning_rate": 7.65608018355271e-06, "loss": 0.4301, "step": 7013, "task_loss": 0.8790510892868042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5632410049438477, "epoch": 5.93, "learning_rate": 7.65004226542688e-06, "loss": 0.4422, "step": 7014, "task_loss": 0.7673523426055908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4600031077861786, "epoch": 5.93, "learning_rate": 7.644004347301051e-06, "loss": 0.602, "step": 7015, "task_loss": 0.34858885407447815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3741466999053955, "epoch": 5.93, "learning_rate": 7.63796642917522e-06, "loss": 0.5048, "step": 7016, "task_loss": 0.9648093581199646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5596623420715332, "epoch": 5.93, "learning_rate": 7.63192851104939e-06, "loss": 0.4372, "step": 7017, "task_loss": 0.593818187713623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8328822255134583, "epoch": 5.93, "learning_rate": 7.62589059292356e-06, "loss": 0.6445, "step": 7018, "task_loss": 0.7616930603981018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21878403425216675, "epoch": 5.93, "learning_rate": 7.6198526747977294e-06, "loss": 0.3451, "step": 7019, "task_loss": 0.7419448494911194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3129991292953491, "epoch": 5.93, "learning_rate": 7.6138147566719e-06, "loss": 0.3389, "step": 7020, "task_loss": 0.3373315930366516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42461833357810974, "epoch": 5.93, "learning_rate": 7.60777683854607e-06, "loss": 0.3885, "step": 7021, "task_loss": 0.3851896822452545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3396170735359192, "epoch": 5.94, "learning_rate": 7.60173892042024e-06, "loss": 0.4242, "step": 7022, "task_loss": 0.6543120741844177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3324466943740845, "epoch": 5.94, "learning_rate": 7.595701002294408e-06, "loss": 0.4188, "step": 7023, "task_loss": 0.6630089282989502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37620246410369873, "epoch": 5.94, "learning_rate": 7.589663084168579e-06, "loss": 0.3767, "step": 7024, "task_loss": 0.5533791780471802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4594841003417969, "epoch": 5.94, "learning_rate": 7.583625166042749e-06, "loss": 0.4321, "step": 7025, "task_loss": 0.19519706070423126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3487852215766907, "epoch": 5.94, "learning_rate": 7.577587247916919e-06, "loss": 0.5114, "step": 7026, "task_loss": 0.11900435388088226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5019280910491943, "epoch": 5.94, "learning_rate": 7.571549329791089e-06, "loss": 0.3939, "step": 7027, "task_loss": 1.2152637243270874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4043170213699341, "epoch": 5.94, "learning_rate": 7.565511411665258e-06, "loss": 0.5566, "step": 7028, "task_loss": 0.7055994272232056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40806329250335693, "epoch": 5.94, "learning_rate": 7.5594734935394275e-06, "loss": 0.3817, "step": 7029, "task_loss": 0.844902753829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.302229642868042, "epoch": 5.94, "learning_rate": 7.553435575413598e-06, "loss": 0.4131, "step": 7030, "task_loss": 0.5857746005058289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38478633761405945, "epoch": 5.94, "learning_rate": 7.547397657287768e-06, "loss": 0.5197, "step": 7031, "task_loss": 0.8376520276069641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5111440420150757, "epoch": 5.94, "learning_rate": 7.5413597391619375e-06, "loss": 0.4412, "step": 7032, "task_loss": 1.3450348377227783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39812418818473816, "epoch": 5.94, "learning_rate": 7.535321821036106e-06, "loss": 0.4569, "step": 7033, "task_loss": 0.999010443687439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3072189688682556, "epoch": 5.95, "learning_rate": 7.529283902910277e-06, "loss": 0.388, "step": 7034, "task_loss": 0.7049151659011841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4906582236289978, "epoch": 5.95, "learning_rate": 7.523245984784447e-06, "loss": 0.3986, "step": 7035, "task_loss": 0.19671744108200073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47010916471481323, "epoch": 5.95, "learning_rate": 7.5172080666586164e-06, "loss": 0.531, "step": 7036, "task_loss": 0.5344096422195435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4831275939941406, "epoch": 5.95, "learning_rate": 7.511170148532787e-06, "loss": 0.5177, "step": 7037, "task_loss": 0.6647403836250305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3792143166065216, "epoch": 5.95, "learning_rate": 7.505132230406955e-06, "loss": 0.3463, "step": 7038, "task_loss": 0.8794869780540466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27047115564346313, "epoch": 5.95, "learning_rate": 7.499094312281126e-06, "loss": 0.4287, "step": 7039, "task_loss": 1.5826219320297241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4534704089164734, "epoch": 5.95, "learning_rate": 7.493056394155295e-06, "loss": 0.4796, "step": 7040, "task_loss": 0.3871309757232666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6455046534538269, "epoch": 5.95, "learning_rate": 7.487018476029466e-06, "loss": 0.4326, "step": 7041, "task_loss": 0.22206416726112366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49456316232681274, "epoch": 5.95, "learning_rate": 7.480980557903636e-06, "loss": 0.4727, "step": 7042, "task_loss": 0.35555681586265564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5057169198989868, "epoch": 5.95, "learning_rate": 7.4749426397778045e-06, "loss": 0.4278, "step": 7043, "task_loss": 1.2687804698944092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3873075842857361, "epoch": 5.95, "learning_rate": 7.468904721651974e-06, "loss": 0.4967, "step": 7044, "task_loss": 0.7278705835342407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21768298745155334, "epoch": 5.95, "learning_rate": 7.462866803526145e-06, "loss": 0.3061, "step": 7045, "task_loss": 0.6389554142951965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27365902066230774, "epoch": 5.96, "learning_rate": 7.4568288854003145e-06, "loss": 0.3973, "step": 7046, "task_loss": 0.07726956903934479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5594854354858398, "epoch": 5.96, "learning_rate": 7.450790967274484e-06, "loss": 0.4927, "step": 7047, "task_loss": 0.8201348781585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41983649134635925, "epoch": 5.96, "learning_rate": 7.444753049148653e-06, "loss": 0.3994, "step": 7048, "task_loss": 0.9367262721061707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4170165956020355, "epoch": 5.96, "learning_rate": 7.438715131022824e-06, "loss": 0.4594, "step": 7049, "task_loss": 0.6192636489868164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42109233140945435, "epoch": 5.96, "learning_rate": 7.432677212896993e-06, "loss": 0.3542, "step": 7050, "task_loss": 1.2949645519256592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5134333968162537, "epoch": 5.96, "learning_rate": 7.426639294771163e-06, "loss": 0.4059, "step": 7051, "task_loss": 0.2380795180797577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.507244884967804, "epoch": 5.96, "learning_rate": 7.420601376645334e-06, "loss": 0.4131, "step": 7052, "task_loss": 0.832607626914978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5876025557518005, "epoch": 5.96, "learning_rate": 7.414563458519503e-06, "loss": 0.4915, "step": 7053, "task_loss": 0.5305912494659424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44183656573295593, "epoch": 5.96, "learning_rate": 7.408525540393672e-06, "loss": 0.3622, "step": 7054, "task_loss": 1.790590524673462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4473643898963928, "epoch": 5.96, "learning_rate": 7.402487622267842e-06, "loss": 0.5364, "step": 7055, "task_loss": 0.7588744759559631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2649776339530945, "epoch": 5.96, "learning_rate": 7.396449704142013e-06, "loss": 0.439, "step": 7056, "task_loss": 0.284032940864563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29009222984313965, "epoch": 5.96, "learning_rate": 7.390411786016182e-06, "loss": 0.4439, "step": 7057, "task_loss": 0.7101868391036987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30446457862854004, "epoch": 5.97, "learning_rate": 7.384373867890351e-06, "loss": 0.412, "step": 7058, "task_loss": 0.5937415361404419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5244344472885132, "epoch": 5.97, "learning_rate": 7.378335949764521e-06, "loss": 0.4599, "step": 7059, "task_loss": 0.7775442004203796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24270044267177582, "epoch": 5.97, "learning_rate": 7.3722980316386915e-06, "loss": 0.3707, "step": 7060, "task_loss": 0.8214513063430786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.670659065246582, "epoch": 5.97, "learning_rate": 7.366260113512861e-06, "loss": 0.5485, "step": 7061, "task_loss": 1.0849199295043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4346197247505188, "epoch": 5.97, "learning_rate": 7.360222195387032e-06, "loss": 0.6423, "step": 7062, "task_loss": 0.3534286320209503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2518426179885864, "epoch": 5.97, "learning_rate": 7.3541842772612e-06, "loss": 0.3991, "step": 7063, "task_loss": 0.11680903285741806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28752532601356506, "epoch": 5.97, "learning_rate": 7.34814635913537e-06, "loss": 0.3104, "step": 7064, "task_loss": 0.3754250407218933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4494609236717224, "epoch": 5.97, "learning_rate": 7.34210844100954e-06, "loss": 0.6208, "step": 7065, "task_loss": 0.899495005607605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34619009494781494, "epoch": 5.97, "learning_rate": 7.336070522883711e-06, "loss": 0.3983, "step": 7066, "task_loss": 0.23274581134319305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5593941807746887, "epoch": 5.97, "learning_rate": 7.33003260475788e-06, "loss": 0.4632, "step": 7067, "task_loss": 0.7184323072433472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5535835027694702, "epoch": 5.97, "learning_rate": 7.323994686632049e-06, "loss": 0.3368, "step": 7068, "task_loss": 0.7134178876876831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5950720906257629, "epoch": 5.97, "learning_rate": 7.317956768506219e-06, "loss": 0.4394, "step": 7069, "task_loss": 1.1516510248184204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3190397024154663, "epoch": 5.98, "learning_rate": 7.31191885038039e-06, "loss": 0.411, "step": 7070, "task_loss": 0.2600443363189697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5478578209877014, "epoch": 5.98, "learning_rate": 7.305880932254559e-06, "loss": 0.4204, "step": 7071, "task_loss": 1.0017175674438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.334171324968338, "epoch": 5.98, "learning_rate": 7.299843014128729e-06, "loss": 0.3976, "step": 7072, "task_loss": 0.9251954555511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3606780171394348, "epoch": 5.98, "learning_rate": 7.293805096002898e-06, "loss": 0.5004, "step": 7073, "task_loss": 0.49163466691970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28308412432670593, "epoch": 5.98, "learning_rate": 7.2877671778770685e-06, "loss": 0.3652, "step": 7074, "task_loss": 0.28958550095558167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36416882276535034, "epoch": 5.98, "learning_rate": 7.281729259751238e-06, "loss": 0.4951, "step": 7075, "task_loss": 0.8822728991508484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4413394629955292, "epoch": 5.98, "learning_rate": 7.275691341625408e-06, "loss": 0.3357, "step": 7076, "task_loss": 0.5711826086044312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27178695797920227, "epoch": 5.98, "learning_rate": 7.2696534234995785e-06, "loss": 0.4049, "step": 7077, "task_loss": 0.4255422353744507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2500690817832947, "epoch": 5.98, "learning_rate": 7.263615505373747e-06, "loss": 0.3383, "step": 7078, "task_loss": 0.180959552526474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3031746745109558, "epoch": 5.98, "learning_rate": 7.257577587247917e-06, "loss": 0.3904, "step": 7079, "task_loss": 0.83329176902771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3383169174194336, "epoch": 5.98, "learning_rate": 7.251539669122087e-06, "loss": 0.489, "step": 7080, "task_loss": 0.6522268652915955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5693873167037964, "epoch": 5.99, "learning_rate": 7.245501750996257e-06, "loss": 0.3995, "step": 7081, "task_loss": 0.6012743711471558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39242082834243774, "epoch": 5.99, "learning_rate": 7.239463832870427e-06, "loss": 0.2985, "step": 7082, "task_loss": 0.6262942552566528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5861955881118774, "epoch": 5.99, "learning_rate": 7.233425914744596e-06, "loss": 0.4791, "step": 7083, "task_loss": 0.7192972302436829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6567158699035645, "epoch": 5.99, "learning_rate": 7.227387996618766e-06, "loss": 0.528, "step": 7084, "task_loss": 0.5775797367095947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22389966249465942, "epoch": 5.99, "learning_rate": 7.221350078492936e-06, "loss": 0.3791, "step": 7085, "task_loss": 0.9731560945510864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2128923386335373, "epoch": 5.99, "learning_rate": 7.215312160367106e-06, "loss": 0.3307, "step": 7086, "task_loss": 0.7405011653900146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6503626704216003, "epoch": 5.99, "learning_rate": 7.209274242241277e-06, "loss": 0.4312, "step": 7087, "task_loss": 0.7636539936065674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.11323842406272888, "epoch": 5.99, "learning_rate": 7.203236324115445e-06, "loss": 0.3105, "step": 7088, "task_loss": 0.2489795982837677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8654056787490845, "epoch": 5.99, "learning_rate": 7.197198405989615e-06, "loss": 0.6491, "step": 7089, "task_loss": 1.5682518482208252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35108596086502075, "epoch": 5.99, "learning_rate": 7.191160487863785e-06, "loss": 0.3238, "step": 7090, "task_loss": 0.4787486791610718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45431530475616455, "epoch": 5.99, "learning_rate": 7.1851225697379555e-06, "loss": 0.4341, "step": 7091, "task_loss": 0.7081970572471619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5746167898178101, "epoch": 5.99, "learning_rate": 7.1790846516121235e-06, "loss": 0.5528, "step": 7092, "task_loss": 1.1433279514312744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4582858383655548, "epoch": 6.0, "learning_rate": 7.173046733486294e-06, "loss": 0.4393, "step": 7093, "task_loss": 0.3183719217777252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.510151207447052, "epoch": 6.0, "learning_rate": 7.167008815360464e-06, "loss": 0.4725, "step": 7094, "task_loss": 0.8925796747207642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2868543863296509, "epoch": 6.0, "learning_rate": 7.160970897234634e-06, "loss": 0.4292, "step": 7095, "task_loss": 0.2884525656700134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28785791993141174, "epoch": 6.0, "learning_rate": 7.154932979108804e-06, "loss": 0.4467, "step": 7096, "task_loss": 0.7509374618530273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4452590346336365, "epoch": 6.0, "learning_rate": 7.148895060982973e-06, "loss": 0.511, "step": 7097, "task_loss": 0.7346837520599365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33730486035346985, "epoch": 6.0, "learning_rate": 7.142857142857143e-06, "loss": 0.517, "step": 7098, "task_loss": 0.965241014957428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5262080430984497, "epoch": 6.0, "learning_rate": 7.136819224731313e-06, "loss": 0.9397, "step": 7099, "task_loss": 0.3902360796928406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5188996195793152, "epoch": 6.0, "learning_rate": 7.130781306605483e-06, "loss": 0.4016, "step": 7100, "task_loss": 0.5250151753425598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36875835061073303, "epoch": 6.0, "learning_rate": 7.124743388479653e-06, "loss": 0.4149, "step": 7101, "task_loss": 0.4496521055698395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.251220703125, "epoch": 6.0, "learning_rate": 7.118705470353822e-06, "loss": 0.4304, "step": 7102, "task_loss": 0.43171605467796326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2986379861831665, "epoch": 6.0, "learning_rate": 7.112667552227991e-06, "loss": 0.433, "step": 7103, "task_loss": 0.47165364027023315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45028892159461975, "epoch": 6.01, "learning_rate": 7.106629634102162e-06, "loss": 0.5214, "step": 7104, "task_loss": 0.8099179267883301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38340699672698975, "epoch": 6.01, "learning_rate": 7.100591715976332e-06, "loss": 0.4698, "step": 7105, "task_loss": 0.3486477732658386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3130529522895813, "epoch": 6.01, "learning_rate": 7.094553797850502e-06, "loss": 0.3593, "step": 7106, "task_loss": 0.2623346745967865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7038084864616394, "epoch": 6.01, "learning_rate": 7.08851587972467e-06, "loss": 0.4758, "step": 7107, "task_loss": 0.6391435861587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46086493134498596, "epoch": 6.01, "learning_rate": 7.082477961598841e-06, "loss": 0.4552, "step": 7108, "task_loss": 0.647635817527771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5871177911758423, "epoch": 6.01, "learning_rate": 7.0764400434730105e-06, "loss": 0.4943, "step": 7109, "task_loss": 1.8093488216400146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8817994594573975, "epoch": 6.01, "learning_rate": 7.070402125347181e-06, "loss": 0.5513, "step": 7110, "task_loss": 0.44257140159606934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6310312747955322, "epoch": 6.01, "learning_rate": 7.064364207221351e-06, "loss": 0.6963, "step": 7111, "task_loss": 0.5926650166511536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3316280245780945, "epoch": 6.01, "learning_rate": 7.05832628909552e-06, "loss": 0.3539, "step": 7112, "task_loss": 0.21706748008728027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5676208734512329, "epoch": 6.01, "learning_rate": 7.0522883709696894e-06, "loss": 0.4231, "step": 7113, "task_loss": 1.0569360256195068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4290180504322052, "epoch": 6.01, "learning_rate": 7.04625045284386e-06, "loss": 0.477, "step": 7114, "task_loss": 0.3144263029098511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15094241499900818, "epoch": 6.01, "learning_rate": 7.04021253471803e-06, "loss": 0.2928, "step": 7115, "task_loss": 0.0428164042532444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6265444755554199, "epoch": 6.02, "learning_rate": 7.0341746165921994e-06, "loss": 0.5195, "step": 7116, "task_loss": 0.41608041524887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34241437911987305, "epoch": 6.02, "learning_rate": 7.028136698466368e-06, "loss": 0.339, "step": 7117, "task_loss": 0.7177985310554504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4421979486942291, "epoch": 6.02, "learning_rate": 7.022098780340539e-06, "loss": 0.3941, "step": 7118, "task_loss": 0.18723012506961823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32216769456863403, "epoch": 6.02, "learning_rate": 7.016060862214709e-06, "loss": 0.3952, "step": 7119, "task_loss": 0.47555041313171387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3246784210205078, "epoch": 6.02, "learning_rate": 7.010022944088878e-06, "loss": 0.4045, "step": 7120, "task_loss": 0.6870850324630737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7007710337638855, "epoch": 6.02, "learning_rate": 7.003985025963049e-06, "loss": 0.4158, "step": 7121, "task_loss": 0.7366905808448792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5591539740562439, "epoch": 6.02, "learning_rate": 6.997947107837218e-06, "loss": 0.3927, "step": 7122, "task_loss": 1.010343074798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.434347927570343, "epoch": 6.02, "learning_rate": 6.9919091897113875e-06, "loss": 0.4522, "step": 7123, "task_loss": 0.6980617642402649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32159489393234253, "epoch": 6.02, "learning_rate": 6.985871271585557e-06, "loss": 0.4841, "step": 7124, "task_loss": 0.5612009763717651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.376287579536438, "epoch": 6.02, "learning_rate": 6.979833353459728e-06, "loss": 0.4228, "step": 7125, "task_loss": 0.8765881657600403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3521145284175873, "epoch": 6.02, "learning_rate": 6.9737954353338975e-06, "loss": 0.3515, "step": 7126, "task_loss": 1.162037968635559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3611052334308624, "epoch": 6.02, "learning_rate": 6.967757517208066e-06, "loss": 0.3082, "step": 7127, "task_loss": 0.404824823141098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6239032745361328, "epoch": 6.03, "learning_rate": 6.961719599082236e-06, "loss": 0.5001, "step": 7128, "task_loss": 0.5826499462127686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33717507123947144, "epoch": 6.03, "learning_rate": 6.955681680956407e-06, "loss": 0.4948, "step": 7129, "task_loss": 0.45620113611221313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.223327174782753, "epoch": 6.03, "learning_rate": 6.9496437628305764e-06, "loss": 0.4137, "step": 7130, "task_loss": 0.4199458658695221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2688449025154114, "epoch": 6.03, "learning_rate": 6.943605844704747e-06, "loss": 0.5287, "step": 7131, "task_loss": 0.1741996556520462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3611210286617279, "epoch": 6.03, "learning_rate": 6.937567926578915e-06, "loss": 0.2833, "step": 7132, "task_loss": 0.4573085904121399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18000757694244385, "epoch": 6.03, "learning_rate": 6.931530008453086e-06, "loss": 0.4284, "step": 7133, "task_loss": 0.4397300183773041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3811725974082947, "epoch": 6.03, "learning_rate": 6.925492090327255e-06, "loss": 0.4054, "step": 7134, "task_loss": 0.34527382254600525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37422382831573486, "epoch": 6.03, "learning_rate": 6.919454172201426e-06, "loss": 0.4071, "step": 7135, "task_loss": 0.5344712734222412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48744499683380127, "epoch": 6.03, "learning_rate": 6.913416254075596e-06, "loss": 0.3935, "step": 7136, "task_loss": 0.44829416275024414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4132014811038971, "epoch": 6.03, "learning_rate": 6.9073783359497645e-06, "loss": 0.4354, "step": 7137, "task_loss": 0.9007019996643066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5106713175773621, "epoch": 6.03, "learning_rate": 6.901340417823934e-06, "loss": 0.4397, "step": 7138, "task_loss": 0.5553090572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3593030273914337, "epoch": 6.03, "learning_rate": 6.895302499698105e-06, "loss": 0.3602, "step": 7139, "task_loss": 0.6285237073898315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4235730767250061, "epoch": 6.04, "learning_rate": 6.8892645815722745e-06, "loss": 0.4447, "step": 7140, "task_loss": 0.4478953182697296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30977529287338257, "epoch": 6.04, "learning_rate": 6.883226663446444e-06, "loss": 0.4706, "step": 7141, "task_loss": 0.2656865119934082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3857533633708954, "epoch": 6.04, "learning_rate": 6.877188745320613e-06, "loss": 0.5095, "step": 7142, "task_loss": 0.420479953289032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4781047999858856, "epoch": 6.04, "learning_rate": 6.871150827194784e-06, "loss": 0.3935, "step": 7143, "task_loss": 0.5897602438926697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48093336820602417, "epoch": 6.04, "learning_rate": 6.865112909068953e-06, "loss": 0.4192, "step": 7144, "task_loss": 0.6875292062759399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5217198133468628, "epoch": 6.04, "learning_rate": 6.859074990943123e-06, "loss": 0.4914, "step": 7145, "task_loss": 1.0731172561645508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49561771750450134, "epoch": 6.04, "learning_rate": 6.853037072817294e-06, "loss": 0.5129, "step": 7146, "task_loss": 0.7291666865348816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4369674324989319, "epoch": 6.04, "learning_rate": 6.846999154691463e-06, "loss": 0.4054, "step": 7147, "task_loss": 0.9252296090126038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29612022638320923, "epoch": 6.04, "learning_rate": 6.840961236565632e-06, "loss": 0.3802, "step": 7148, "task_loss": 0.43354177474975586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4211171269416809, "epoch": 6.04, "learning_rate": 6.834923318439802e-06, "loss": 0.4061, "step": 7149, "task_loss": 0.8335044980049133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35648655891418457, "epoch": 6.04, "learning_rate": 6.828885400313973e-06, "loss": 0.4281, "step": 7150, "task_loss": 0.5089904069900513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3265579640865326, "epoch": 6.04, "learning_rate": 6.822847482188142e-06, "loss": 0.3215, "step": 7151, "task_loss": 0.3704642057418823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5553097128868103, "epoch": 6.05, "learning_rate": 6.816809564062311e-06, "loss": 0.4735, "step": 7152, "task_loss": 1.075695514678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2504265308380127, "epoch": 6.05, "learning_rate": 6.810771645936481e-06, "loss": 0.3984, "step": 7153, "task_loss": 0.03829047828912735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29096919298171997, "epoch": 6.05, "learning_rate": 6.8047337278106515e-06, "loss": 0.4162, "step": 7154, "task_loss": 0.144149512052536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.734687328338623, "epoch": 6.05, "learning_rate": 6.798695809684821e-06, "loss": 0.5563, "step": 7155, "task_loss": 0.2257729321718216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6400864720344543, "epoch": 6.05, "learning_rate": 6.792657891558992e-06, "loss": 0.6251, "step": 7156, "task_loss": 0.8936904668807983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3554542660713196, "epoch": 6.05, "learning_rate": 6.78661997343316e-06, "loss": 0.4187, "step": 7157, "task_loss": 0.327172189950943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29771751165390015, "epoch": 6.05, "learning_rate": 6.78058205530733e-06, "loss": 0.3586, "step": 7158, "task_loss": 0.35997769236564636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4271741509437561, "epoch": 6.05, "learning_rate": 6.7745441371815e-06, "loss": 0.4096, "step": 7159, "task_loss": 0.46357718110084534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26467353105545044, "epoch": 6.05, "learning_rate": 6.768506219055671e-06, "loss": 0.4243, "step": 7160, "task_loss": 0.17903000116348267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35673514008522034, "epoch": 6.05, "learning_rate": 6.76246830092984e-06, "loss": 0.4081, "step": 7161, "task_loss": 0.9858098030090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41611674427986145, "epoch": 6.05, "learning_rate": 6.756430382804009e-06, "loss": 0.4238, "step": 7162, "task_loss": 0.32074862718582153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.371525377035141, "epoch": 6.05, "learning_rate": 6.750392464678179e-06, "loss": 0.3966, "step": 7163, "task_loss": 0.5520741939544678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2822134494781494, "epoch": 6.06, "learning_rate": 6.744354546552349e-06, "loss": 0.4649, "step": 7164, "task_loss": 0.5250979065895081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26244956254959106, "epoch": 6.06, "learning_rate": 6.738316628426519e-06, "loss": 0.4808, "step": 7165, "task_loss": 0.7779847383499146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2613524794578552, "epoch": 6.06, "learning_rate": 6.732278710300689e-06, "loss": 0.3055, "step": 7166, "task_loss": 0.06769565492868423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3707756996154785, "epoch": 6.06, "learning_rate": 6.726240792174858e-06, "loss": 0.5357, "step": 7167, "task_loss": 0.07357662171125412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.353291392326355, "epoch": 6.06, "learning_rate": 6.720202874049028e-06, "loss": 0.4597, "step": 7168, "task_loss": 0.1118333637714386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2877667546272278, "epoch": 6.06, "learning_rate": 6.714164955923198e-06, "loss": 0.4445, "step": 7169, "task_loss": 0.35710740089416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35396718978881836, "epoch": 6.06, "learning_rate": 6.708127037797368e-06, "loss": 0.3683, "step": 7170, "task_loss": 0.2976921498775482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4750630855560303, "epoch": 6.06, "learning_rate": 6.7020891196715385e-06, "loss": 0.4242, "step": 7171, "task_loss": 0.726797878742218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37916404008865356, "epoch": 6.06, "learning_rate": 6.6960512015457065e-06, "loss": 0.4347, "step": 7172, "task_loss": 0.2923533022403717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3319649398326874, "epoch": 6.06, "learning_rate": 6.690013283419877e-06, "loss": 0.5594, "step": 7173, "task_loss": 1.6473699808120728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36475253105163574, "epoch": 6.06, "learning_rate": 6.683975365294047e-06, "loss": 0.6092, "step": 7174, "task_loss": 0.8801401853561401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6032578349113464, "epoch": 6.07, "learning_rate": 6.677937447168217e-06, "loss": 0.5348, "step": 7175, "task_loss": 0.7852023839950562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6037065982818604, "epoch": 6.07, "learning_rate": 6.671899529042387e-06, "loss": 0.4547, "step": 7176, "task_loss": 0.3197253942489624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27962881326675415, "epoch": 6.07, "learning_rate": 6.665861610916556e-06, "loss": 0.4451, "step": 7177, "task_loss": 0.07280945032835007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4126201868057251, "epoch": 6.07, "learning_rate": 6.659823692790726e-06, "loss": 0.4222, "step": 7178, "task_loss": 0.5707032680511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.253854900598526, "epoch": 6.07, "learning_rate": 6.653785774664896e-06, "loss": 0.3142, "step": 7179, "task_loss": 0.8102204203605652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4161665439605713, "epoch": 6.07, "learning_rate": 6.647747856539066e-06, "loss": 0.4745, "step": 7180, "task_loss": 1.3409544229507446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5477035045623779, "epoch": 6.07, "learning_rate": 6.641709938413235e-06, "loss": 0.4311, "step": 7181, "task_loss": 0.7404505014419556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5668452382087708, "epoch": 6.07, "learning_rate": 6.635672020287405e-06, "loss": 0.4508, "step": 7182, "task_loss": 0.8325549364089966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22278167307376862, "epoch": 6.07, "learning_rate": 6.629634102161575e-06, "loss": 0.3544, "step": 7183, "task_loss": 0.08417687565088272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41919636726379395, "epoch": 6.07, "learning_rate": 6.623596184035745e-06, "loss": 0.5219, "step": 7184, "task_loss": 0.24531899392604828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3704850375652313, "epoch": 6.07, "learning_rate": 6.617558265909915e-06, "loss": 0.4751, "step": 7185, "task_loss": 0.6573542356491089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4520191252231598, "epoch": 6.07, "learning_rate": 6.6115203477840835e-06, "loss": 0.3109, "step": 7186, "task_loss": 0.5783968567848206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21963025629520416, "epoch": 6.08, "learning_rate": 6.605482429658254e-06, "loss": 0.4426, "step": 7187, "task_loss": 0.29841774702072144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6478004455566406, "epoch": 6.08, "learning_rate": 6.599444511532424e-06, "loss": 0.5063, "step": 7188, "task_loss": 1.5139487981796265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45501431822776794, "epoch": 6.08, "learning_rate": 6.5934065934065935e-06, "loss": 0.4693, "step": 7189, "task_loss": 1.0436865091323853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6331683397293091, "epoch": 6.08, "learning_rate": 6.587368675280764e-06, "loss": 0.4353, "step": 7190, "task_loss": 1.1692107915878296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35024380683898926, "epoch": 6.08, "learning_rate": 6.581330757154933e-06, "loss": 0.3994, "step": 7191, "task_loss": 0.4679393768310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5039750337600708, "epoch": 6.08, "learning_rate": 6.575292839029103e-06, "loss": 0.4574, "step": 7192, "task_loss": 0.6149628758430481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5373827815055847, "epoch": 6.08, "learning_rate": 6.5692549209032724e-06, "loss": 0.4481, "step": 7193, "task_loss": 1.1550272703170776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2818429470062256, "epoch": 6.08, "learning_rate": 6.563217002777443e-06, "loss": 0.4204, "step": 7194, "task_loss": 0.3789758086204529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33568838238716125, "epoch": 6.08, "learning_rate": 6.557179084651613e-06, "loss": 0.3075, "step": 7195, "task_loss": 0.23927980661392212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4208991825580597, "epoch": 6.08, "learning_rate": 6.551141166525782e-06, "loss": 0.4062, "step": 7196, "task_loss": 1.020880937576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3647145926952362, "epoch": 6.08, "learning_rate": 6.545103248399951e-06, "loss": 0.3981, "step": 7197, "task_loss": 0.5236802697181702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2462729662656784, "epoch": 6.08, "learning_rate": 6.539065330274122e-06, "loss": 0.3264, "step": 7198, "task_loss": 0.44931530952453613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4583521783351898, "epoch": 6.09, "learning_rate": 6.533027412148292e-06, "loss": 0.4561, "step": 7199, "task_loss": 0.8439003825187683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15079770982265472, "epoch": 6.09, "learning_rate": 6.526989494022462e-06, "loss": 0.3586, "step": 7200, "task_loss": 0.6448714733123779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30904316902160645, "epoch": 6.09, "learning_rate": 6.52095157589663e-06, "loss": 0.5907, "step": 7201, "task_loss": 0.2571970224380493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3346657156944275, "epoch": 6.09, "learning_rate": 6.514913657770801e-06, "loss": 0.3241, "step": 7202, "task_loss": 0.1623639166355133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36350733041763306, "epoch": 6.09, "learning_rate": 6.5088757396449705e-06, "loss": 0.3813, "step": 7203, "task_loss": 0.7544834017753601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2563495337963104, "epoch": 6.09, "learning_rate": 6.502837821519141e-06, "loss": 0.396, "step": 7204, "task_loss": 0.7555733323097229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39668601751327515, "epoch": 6.09, "learning_rate": 6.496799903393311e-06, "loss": 0.5621, "step": 7205, "task_loss": 0.28675737977027893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4389287829399109, "epoch": 6.09, "learning_rate": 6.49076198526748e-06, "loss": 0.4949, "step": 7206, "task_loss": 1.0579266548156738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.367116779088974, "epoch": 6.09, "learning_rate": 6.484724067141649e-06, "loss": 0.3827, "step": 7207, "task_loss": 0.10323720425367355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19079753756523132, "epoch": 6.09, "learning_rate": 6.47868614901582e-06, "loss": 0.3817, "step": 7208, "task_loss": 0.8882685303688049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4799621105194092, "epoch": 6.09, "learning_rate": 6.47264823088999e-06, "loss": 0.3971, "step": 7209, "task_loss": 0.3712241053581238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2461603730916977, "epoch": 6.09, "learning_rate": 6.4666103127641594e-06, "loss": 0.3244, "step": 7210, "task_loss": 0.5583053827285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5292807817459106, "epoch": 6.1, "learning_rate": 6.460572394638328e-06, "loss": 0.4817, "step": 7211, "task_loss": 1.0666821002960205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.341541588306427, "epoch": 6.1, "learning_rate": 6.454534476512499e-06, "loss": 0.4323, "step": 7212, "task_loss": 0.02787797898054123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38108378648757935, "epoch": 6.1, "learning_rate": 6.448496558386669e-06, "loss": 0.3863, "step": 7213, "task_loss": 0.33153119683265686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37953752279281616, "epoch": 6.1, "learning_rate": 6.442458640260838e-06, "loss": 0.4349, "step": 7214, "task_loss": 0.2721627950668335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30654704570770264, "epoch": 6.1, "learning_rate": 6.436420722135009e-06, "loss": 0.3059, "step": 7215, "task_loss": 0.2704087793827057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5520497560501099, "epoch": 6.1, "learning_rate": 6.430382804009178e-06, "loss": 0.4473, "step": 7216, "task_loss": 0.7592300176620483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5831268429756165, "epoch": 6.1, "learning_rate": 6.4243448858833475e-06, "loss": 0.4839, "step": 7217, "task_loss": 0.8331745862960815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7392500042915344, "epoch": 6.1, "learning_rate": 6.418306967757517e-06, "loss": 0.4454, "step": 7218, "task_loss": 0.5427871942520142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30849045515060425, "epoch": 6.1, "learning_rate": 6.412269049631688e-06, "loss": 0.4129, "step": 7219, "task_loss": 1.013205647468567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43472176790237427, "epoch": 6.1, "learning_rate": 6.4062311315058575e-06, "loss": 0.429, "step": 7220, "task_loss": 0.9005637168884277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4241586923599243, "epoch": 6.1, "learning_rate": 6.400193213380026e-06, "loss": 0.3496, "step": 7221, "task_loss": 0.39624297618865967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19200864434242249, "epoch": 6.1, "learning_rate": 6.394155295254196e-06, "loss": 0.3586, "step": 7222, "task_loss": 0.028283827006816864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3397844731807709, "epoch": 6.11, "learning_rate": 6.388117377128367e-06, "loss": 0.5111, "step": 7223, "task_loss": 0.6029148101806641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34376752376556396, "epoch": 6.11, "learning_rate": 6.382079459002536e-06, "loss": 0.4528, "step": 7224, "task_loss": 0.931618332862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33462175726890564, "epoch": 6.11, "learning_rate": 6.376041540876707e-06, "loss": 0.3314, "step": 7225, "task_loss": 0.958778440952301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32678845524787903, "epoch": 6.11, "learning_rate": 6.370003622750875e-06, "loss": 0.4355, "step": 7226, "task_loss": 0.13563141226768494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.584662675857544, "epoch": 6.11, "learning_rate": 6.363965704625046e-06, "loss": 0.4765, "step": 7227, "task_loss": 0.44774317741394043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4314029812812805, "epoch": 6.11, "learning_rate": 6.357927786499215e-06, "loss": 0.4261, "step": 7228, "task_loss": 0.5963894724845886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7416990399360657, "epoch": 6.11, "learning_rate": 6.351889868373385e-06, "loss": 0.472, "step": 7229, "task_loss": 0.5903911590576172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4081224203109741, "epoch": 6.11, "learning_rate": 6.345851950247556e-06, "loss": 0.5169, "step": 7230, "task_loss": 0.15100185573101044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5304721593856812, "epoch": 6.11, "learning_rate": 6.3398140321217245e-06, "loss": 0.4036, "step": 7231, "task_loss": 0.48152437806129456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.339046835899353, "epoch": 6.11, "learning_rate": 6.333776113995894e-06, "loss": 0.3833, "step": 7232, "task_loss": 0.14621078968048096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5772907137870789, "epoch": 6.11, "learning_rate": 6.327738195870064e-06, "loss": 0.4169, "step": 7233, "task_loss": 0.6762048602104187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3395792543888092, "epoch": 6.11, "learning_rate": 6.3217002777442345e-06, "loss": 0.4118, "step": 7234, "task_loss": 0.41521400213241577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4958908259868622, "epoch": 6.12, "learning_rate": 6.315662359618404e-06, "loss": 0.5398, "step": 7235, "task_loss": 0.21414095163345337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4538428783416748, "epoch": 6.12, "learning_rate": 6.309624441492573e-06, "loss": 0.342, "step": 7236, "task_loss": 0.7001739740371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41967061161994934, "epoch": 6.12, "learning_rate": 6.303586523366743e-06, "loss": 0.4931, "step": 7237, "task_loss": 0.8529934883117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23234857618808746, "epoch": 6.12, "learning_rate": 6.297548605240913e-06, "loss": 0.2655, "step": 7238, "task_loss": 0.148982971906662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47618067264556885, "epoch": 6.12, "learning_rate": 6.291510687115083e-06, "loss": 0.4274, "step": 7239, "task_loss": 1.2428069114685059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5605955123901367, "epoch": 6.12, "learning_rate": 6.285472768989254e-06, "loss": 0.4915, "step": 7240, "task_loss": 0.3070848882198334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32097023725509644, "epoch": 6.12, "learning_rate": 6.279434850863422e-06, "loss": 0.3544, "step": 7241, "task_loss": 0.7405945062637329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26526740193367004, "epoch": 6.12, "learning_rate": 6.273396932737592e-06, "loss": 0.4234, "step": 7242, "task_loss": 0.49552497267723083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28193455934524536, "epoch": 6.12, "learning_rate": 6.267359014611762e-06, "loss": 0.3365, "step": 7243, "task_loss": 0.5540476441383362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5849704742431641, "epoch": 6.12, "learning_rate": 6.261321096485933e-06, "loss": 0.4917, "step": 7244, "task_loss": 0.6016995310783386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6353238821029663, "epoch": 6.12, "learning_rate": 6.255283178360102e-06, "loss": 0.5695, "step": 7245, "task_loss": 0.5593791604042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48674285411834717, "epoch": 6.13, "learning_rate": 6.249245260234272e-06, "loss": 0.437, "step": 7246, "task_loss": 0.8704448938369751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3887278437614441, "epoch": 6.13, "learning_rate": 6.243207342108441e-06, "loss": 0.3945, "step": 7247, "task_loss": 1.036819577217102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7221405506134033, "epoch": 6.13, "learning_rate": 6.2371694239826115e-06, "loss": 0.4011, "step": 7248, "task_loss": 0.8440427780151367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2905399799346924, "epoch": 6.13, "learning_rate": 6.23113150585678e-06, "loss": 0.293, "step": 7249, "task_loss": 0.39458930492401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44837361574172974, "epoch": 6.13, "learning_rate": 6.225093587730951e-06, "loss": 0.3776, "step": 7250, "task_loss": 0.8367217779159546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3761056661605835, "epoch": 6.13, "learning_rate": 6.219055669605121e-06, "loss": 0.3713, "step": 7251, "task_loss": 0.9336838126182556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5167402625083923, "epoch": 6.13, "learning_rate": 6.21301775147929e-06, "loss": 0.5295, "step": 7252, "task_loss": 0.24397388100624084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37550050020217896, "epoch": 6.13, "learning_rate": 6.20697983335346e-06, "loss": 0.5021, "step": 7253, "task_loss": 0.22421199083328247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30869266390800476, "epoch": 6.13, "learning_rate": 6.20094191522763e-06, "loss": 0.5231, "step": 7254, "task_loss": 0.8793156147003174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21807971596717834, "epoch": 6.13, "learning_rate": 6.1949039971017996e-06, "loss": 0.3784, "step": 7255, "task_loss": 0.5410561561584473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27783331274986267, "epoch": 6.13, "learning_rate": 6.188866078975969e-06, "loss": 0.3802, "step": 7256, "task_loss": 0.4019317328929901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.655258059501648, "epoch": 6.13, "learning_rate": 6.182828160850139e-06, "loss": 0.356, "step": 7257, "task_loss": 0.8426075577735901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30961620807647705, "epoch": 6.14, "learning_rate": 6.176790242724309e-06, "loss": 0.5551, "step": 7258, "task_loss": 0.8421933650970459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24080996215343475, "epoch": 6.14, "learning_rate": 6.1707523245984785e-06, "loss": 0.3145, "step": 7259, "task_loss": 0.3258103132247925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6343517303466797, "epoch": 6.14, "learning_rate": 6.164714406472648e-06, "loss": 0.6194, "step": 7260, "task_loss": 0.9431158900260925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43458953499794006, "epoch": 6.14, "learning_rate": 6.158676488346819e-06, "loss": 0.3747, "step": 7261, "task_loss": 0.0729730948805809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22360976040363312, "epoch": 6.14, "learning_rate": 6.152638570220988e-06, "loss": 0.3519, "step": 7262, "task_loss": 0.018381869420409203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30725303292274475, "epoch": 6.14, "learning_rate": 6.146600652095158e-06, "loss": 0.4896, "step": 7263, "task_loss": 0.7540176510810852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4743155539035797, "epoch": 6.14, "learning_rate": 6.140562733969327e-06, "loss": 0.5252, "step": 7264, "task_loss": 2.6361896991729736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40360334515571594, "epoch": 6.14, "learning_rate": 6.134524815843498e-06, "loss": 0.5201, "step": 7265, "task_loss": 1.1085761785507202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7494778633117676, "epoch": 6.14, "learning_rate": 6.128486897717667e-06, "loss": 0.7189, "step": 7266, "task_loss": 0.6918783783912659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34853583574295044, "epoch": 6.14, "learning_rate": 6.122448979591837e-06, "loss": 0.3056, "step": 7267, "task_loss": 1.0667970180511475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33720457553863525, "epoch": 6.14, "learning_rate": 6.116411061466007e-06, "loss": 0.3559, "step": 7268, "task_loss": 0.8019275069236755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38479551672935486, "epoch": 6.14, "learning_rate": 6.1103731433401765e-06, "loss": 0.4182, "step": 7269, "task_loss": 0.2925267219543457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46497076749801636, "epoch": 6.15, "learning_rate": 6.104335225214346e-06, "loss": 0.5108, "step": 7270, "task_loss": 0.0890534445643425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7163013219833374, "epoch": 6.15, "learning_rate": 6.098297307088517e-06, "loss": 0.5152, "step": 7271, "task_loss": 0.5404446721076965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24514922499656677, "epoch": 6.15, "learning_rate": 6.092259388962686e-06, "loss": 0.304, "step": 7272, "task_loss": 0.1903451383113861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6874876022338867, "epoch": 6.15, "learning_rate": 6.086221470836856e-06, "loss": 0.4389, "step": 7273, "task_loss": 0.46717455983161926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36466658115386963, "epoch": 6.15, "learning_rate": 6.080183552711025e-06, "loss": 0.4183, "step": 7274, "task_loss": 0.7481908202171326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38540419936180115, "epoch": 6.15, "learning_rate": 6.074145634585196e-06, "loss": 0.507, "step": 7275, "task_loss": 0.8377128839492798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.607723593711853, "epoch": 6.15, "learning_rate": 6.0681077164593655e-06, "loss": 0.5996, "step": 7276, "task_loss": 1.2768973112106323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4994179606437683, "epoch": 6.15, "learning_rate": 6.062069798333535e-06, "loss": 0.447, "step": 7277, "task_loss": 0.4322272539138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40883272886276245, "epoch": 6.15, "learning_rate": 6.056031880207705e-06, "loss": 0.3761, "step": 7278, "task_loss": 0.07739400863647461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39878591895103455, "epoch": 6.15, "learning_rate": 6.049993962081875e-06, "loss": 0.4188, "step": 7279, "task_loss": 0.5073037147521973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3509463667869568, "epoch": 6.15, "learning_rate": 6.043956043956044e-06, "loss": 0.3611, "step": 7280, "task_loss": 0.7188200354576111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.436120867729187, "epoch": 6.15, "learning_rate": 6.037918125830214e-06, "loss": 0.441, "step": 7281, "task_loss": 0.7510096430778503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49622464179992676, "epoch": 6.16, "learning_rate": 6.031880207704384e-06, "loss": 0.4155, "step": 7282, "task_loss": 0.43904760479927063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39170512557029724, "epoch": 6.16, "learning_rate": 6.0258422895785535e-06, "loss": 0.5618, "step": 7283, "task_loss": 0.6548123955726624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3486229181289673, "epoch": 6.16, "learning_rate": 6.019804371452723e-06, "loss": 0.3256, "step": 7284, "task_loss": 0.7142788767814636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2012559324502945, "epoch": 6.16, "learning_rate": 6.013766453326893e-06, "loss": 0.389, "step": 7285, "task_loss": 0.2915891706943512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.316392183303833, "epoch": 6.16, "learning_rate": 6.0077285352010635e-06, "loss": 0.4925, "step": 7286, "task_loss": 0.4840840697288513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3201707899570465, "epoch": 6.16, "learning_rate": 6.0016906170752324e-06, "loss": 0.4569, "step": 7287, "task_loss": 0.10829141736030579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4134579002857208, "epoch": 6.16, "learning_rate": 5.995652698949403e-06, "loss": 0.4123, "step": 7288, "task_loss": 0.3835233449935913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4540594518184662, "epoch": 6.16, "learning_rate": 5.989614780823572e-06, "loss": 0.5934, "step": 7289, "task_loss": 1.351678729057312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.385326623916626, "epoch": 6.16, "learning_rate": 5.9835768626977424e-06, "loss": 0.3144, "step": 7290, "task_loss": 0.5265473127365112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45922553539276123, "epoch": 6.16, "learning_rate": 5.977538944571911e-06, "loss": 0.4517, "step": 7291, "task_loss": 0.29739147424697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3387126922607422, "epoch": 6.16, "learning_rate": 5.971501026446082e-06, "loss": 0.4115, "step": 7292, "task_loss": 0.6748889088630676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6860440373420715, "epoch": 6.16, "learning_rate": 5.965463108320252e-06, "loss": 0.4421, "step": 7293, "task_loss": 0.7484753727912903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23819462954998016, "epoch": 6.17, "learning_rate": 5.959425190194421e-06, "loss": 0.3806, "step": 7294, "task_loss": 0.542414665222168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2464994490146637, "epoch": 6.17, "learning_rate": 5.953387272068591e-06, "loss": 0.371, "step": 7295, "task_loss": 0.10484969615936279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5479385852813721, "epoch": 6.17, "learning_rate": 5.947349353942761e-06, "loss": 0.6044, "step": 7296, "task_loss": 0.6484646201133728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7702033519744873, "epoch": 6.17, "learning_rate": 5.9413114358169305e-06, "loss": 0.5421, "step": 7297, "task_loss": 0.3023211658000946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5532692670822144, "epoch": 6.17, "learning_rate": 5.9352735176911e-06, "loss": 0.3602, "step": 7298, "task_loss": 0.6200825572013855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32802143692970276, "epoch": 6.17, "learning_rate": 5.92923559956527e-06, "loss": 0.3859, "step": 7299, "task_loss": 0.6853464245796204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19541683793067932, "epoch": 6.17, "learning_rate": 5.92319768143944e-06, "loss": 0.3515, "step": 7300, "task_loss": 0.5572487115859985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49718689918518066, "epoch": 6.17, "learning_rate": 5.917159763313609e-06, "loss": 0.3829, "step": 7301, "task_loss": 0.9976769089698792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20859295129776, "epoch": 6.17, "learning_rate": 5.911121845187779e-06, "loss": 0.3031, "step": 7302, "task_loss": 0.20879845321178436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3193260729312897, "epoch": 6.17, "learning_rate": 5.90508392706195e-06, "loss": 0.5351, "step": 7303, "task_loss": 0.32081133127212524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34988072514533997, "epoch": 6.17, "learning_rate": 5.899046008936119e-06, "loss": 0.3856, "step": 7304, "task_loss": 0.47435665130615234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5564247965812683, "epoch": 6.17, "learning_rate": 5.893008090810289e-06, "loss": 0.5241, "step": 7305, "task_loss": 1.6786940097808838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17770510911941528, "epoch": 6.18, "learning_rate": 5.886970172684458e-06, "loss": 0.3584, "step": 7306, "task_loss": 0.0852338969707489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5578165054321289, "epoch": 6.18, "learning_rate": 5.880932254558629e-06, "loss": 0.4564, "step": 7307, "task_loss": 0.4016437232494354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6362099647521973, "epoch": 6.18, "learning_rate": 5.874894336432798e-06, "loss": 0.4208, "step": 7308, "task_loss": 1.5623605251312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5555299520492554, "epoch": 6.18, "learning_rate": 5.868856418306968e-06, "loss": 0.482, "step": 7309, "task_loss": 1.108620285987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24304023385047913, "epoch": 6.18, "learning_rate": 5.862818500181138e-06, "loss": 0.3736, "step": 7310, "task_loss": 0.15987583994865417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6921083927154541, "epoch": 6.18, "learning_rate": 5.8567805820553075e-06, "loss": 0.6145, "step": 7311, "task_loss": 0.6368083953857422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3926209807395935, "epoch": 6.18, "learning_rate": 5.850742663929477e-06, "loss": 0.463, "step": 7312, "task_loss": 0.3289099633693695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3738211691379547, "epoch": 6.18, "learning_rate": 5.844704745803648e-06, "loss": 0.4137, "step": 7313, "task_loss": 0.34171897172927856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3224182426929474, "epoch": 6.18, "learning_rate": 5.838666827677817e-06, "loss": 0.4432, "step": 7314, "task_loss": 0.2815491259098053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3088608384132385, "epoch": 6.18, "learning_rate": 5.832628909551987e-06, "loss": 0.3704, "step": 7315, "task_loss": 0.6262466311454773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34182438254356384, "epoch": 6.18, "learning_rate": 5.826590991426156e-06, "loss": 0.5061, "step": 7316, "task_loss": 0.377057284116745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5689165592193604, "epoch": 6.19, "learning_rate": 5.820553073300327e-06, "loss": 0.4447, "step": 7317, "task_loss": 0.24629873037338257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2638874650001526, "epoch": 6.19, "learning_rate": 5.814515155174496e-06, "loss": 0.3839, "step": 7318, "task_loss": 0.6169545650482178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.368215948343277, "epoch": 6.19, "learning_rate": 5.808477237048666e-06, "loss": 0.3961, "step": 7319, "task_loss": 0.7739343047142029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43293672800064087, "epoch": 6.19, "learning_rate": 5.802439318922836e-06, "loss": 0.4008, "step": 7320, "task_loss": 1.3913404941558838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45702236890792847, "epoch": 6.19, "learning_rate": 5.796401400797006e-06, "loss": 0.5692, "step": 7321, "task_loss": 1.6856454610824585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24414291977882385, "epoch": 6.19, "learning_rate": 5.790363482671175e-06, "loss": 0.3822, "step": 7322, "task_loss": 0.7323386073112488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3668941855430603, "epoch": 6.19, "learning_rate": 5.784325564545345e-06, "loss": 0.4266, "step": 7323, "task_loss": 0.5207235813140869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38056740164756775, "epoch": 6.19, "learning_rate": 5.778287646419515e-06, "loss": 0.447, "step": 7324, "task_loss": 0.2720244824886322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4575008153915405, "epoch": 6.19, "learning_rate": 5.7722497282936845e-06, "loss": 0.4409, "step": 7325, "task_loss": 0.8203812837600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22949141263961792, "epoch": 6.19, "learning_rate": 5.766211810167854e-06, "loss": 0.4594, "step": 7326, "task_loss": 0.1782011240720749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27222833037376404, "epoch": 6.19, "learning_rate": 5.760173892042024e-06, "loss": 0.4155, "step": 7327, "task_loss": 0.15903620421886444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24252432584762573, "epoch": 6.19, "learning_rate": 5.7541359739161945e-06, "loss": 0.394, "step": 7328, "task_loss": 0.5022997856140137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21282315254211426, "epoch": 6.2, "learning_rate": 5.748098055790363e-06, "loss": 0.4268, "step": 7329, "task_loss": 0.7104138731956482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4601905643939972, "epoch": 6.2, "learning_rate": 5.742060137664534e-06, "loss": 0.3699, "step": 7330, "task_loss": 0.3149878978729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28273850679397583, "epoch": 6.2, "learning_rate": 5.736022219538703e-06, "loss": 0.3864, "step": 7331, "task_loss": 0.39543840289115906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7686933279037476, "epoch": 6.2, "learning_rate": 5.729984301412873e-06, "loss": 0.5822, "step": 7332, "task_loss": 0.7132221460342407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33279579877853394, "epoch": 6.2, "learning_rate": 5.723946383287043e-06, "loss": 0.3279, "step": 7333, "task_loss": 0.8187244534492493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36706727743148804, "epoch": 6.2, "learning_rate": 5.717908465161213e-06, "loss": 0.3978, "step": 7334, "task_loss": 0.58487868309021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3357492685317993, "epoch": 6.2, "learning_rate": 5.7118705470353826e-06, "loss": 0.3527, "step": 7335, "task_loss": 0.7118744254112244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2914299964904785, "epoch": 6.2, "learning_rate": 5.705832628909552e-06, "loss": 0.3204, "step": 7336, "task_loss": 0.05488895624876022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3607632517814636, "epoch": 6.2, "learning_rate": 5.699794710783722e-06, "loss": 0.3934, "step": 7337, "task_loss": 0.3483980894088745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5294084548950195, "epoch": 6.2, "learning_rate": 5.693756792657892e-06, "loss": 0.4286, "step": 7338, "task_loss": 1.129359483718872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18070408701896667, "epoch": 6.2, "learning_rate": 5.6877188745320615e-06, "loss": 0.3092, "step": 7339, "task_loss": 0.5854549407958984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22348564863204956, "epoch": 6.2, "learning_rate": 5.681680956406232e-06, "loss": 0.3731, "step": 7340, "task_loss": 0.2520732879638672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3547956347465515, "epoch": 6.21, "learning_rate": 5.675643038280401e-06, "loss": 0.4684, "step": 7341, "task_loss": 0.5506937503814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4159926772117615, "epoch": 6.21, "learning_rate": 5.6696051201545715e-06, "loss": 0.3774, "step": 7342, "task_loss": 0.7520624399185181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2730810344219208, "epoch": 6.21, "learning_rate": 5.66356720202874e-06, "loss": 0.3755, "step": 7343, "task_loss": 0.18272829055786133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5668665170669556, "epoch": 6.21, "learning_rate": 5.65752928390291e-06, "loss": 0.5108, "step": 7344, "task_loss": 0.35754671692848206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.619672417640686, "epoch": 6.21, "learning_rate": 5.651491365777081e-06, "loss": 0.5321, "step": 7345, "task_loss": 0.7355321645736694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3953667879104614, "epoch": 6.21, "learning_rate": 5.6454534476512495e-06, "loss": 0.4635, "step": 7346, "task_loss": 0.4438496530056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3912051022052765, "epoch": 6.21, "learning_rate": 5.63941552952542e-06, "loss": 0.3678, "step": 7347, "task_loss": 0.6003245115280151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5242830514907837, "epoch": 6.21, "learning_rate": 5.633377611399589e-06, "loss": 0.5184, "step": 7348, "task_loss": 0.28880128264427185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6186342239379883, "epoch": 6.21, "learning_rate": 5.6273396932737596e-06, "loss": 0.4431, "step": 7349, "task_loss": 1.725752830505371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2898389399051666, "epoch": 6.21, "learning_rate": 5.621301775147929e-06, "loss": 0.3581, "step": 7350, "task_loss": 0.2267938256263733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35627055168151855, "epoch": 6.21, "learning_rate": 5.615263857022099e-06, "loss": 0.4487, "step": 7351, "task_loss": 1.0505273342132568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8235265016555786, "epoch": 6.21, "learning_rate": 5.609225938896269e-06, "loss": 0.5961, "step": 7352, "task_loss": 0.9141087532043457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6401113867759705, "epoch": 6.22, "learning_rate": 5.6031880207704385e-06, "loss": 0.5655, "step": 7353, "task_loss": 1.0912744998931885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7079514265060425, "epoch": 6.22, "learning_rate": 5.597150102644608e-06, "loss": 0.5564, "step": 7354, "task_loss": 0.8809816837310791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6462124586105347, "epoch": 6.22, "learning_rate": 5.591112184518779e-06, "loss": 0.4641, "step": 7355, "task_loss": 0.7900420427322388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3693937361240387, "epoch": 6.22, "learning_rate": 5.585074266392948e-06, "loss": 0.431, "step": 7356, "task_loss": 0.687492311000824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3564663529396057, "epoch": 6.22, "learning_rate": 5.579036348267118e-06, "loss": 0.3549, "step": 7357, "task_loss": 0.46581822633743286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3641672730445862, "epoch": 6.22, "learning_rate": 5.572998430141287e-06, "loss": 0.3712, "step": 7358, "task_loss": 0.41328558325767517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5339758396148682, "epoch": 6.22, "learning_rate": 5.566960512015458e-06, "loss": 0.427, "step": 7359, "task_loss": 0.92621248960495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4629404544830322, "epoch": 6.22, "learning_rate": 5.560922593889627e-06, "loss": 0.3534, "step": 7360, "task_loss": 0.6260806322097778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5174963474273682, "epoch": 6.22, "learning_rate": 5.554884675763797e-06, "loss": 0.4567, "step": 7361, "task_loss": 0.716735303401947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23456385731697083, "epoch": 6.22, "learning_rate": 5.548846757637967e-06, "loss": 0.331, "step": 7362, "task_loss": 0.4039887487888336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23929733037948608, "epoch": 6.22, "learning_rate": 5.5428088395121365e-06, "loss": 0.3706, "step": 7363, "task_loss": 0.5053170323371887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6820805668830872, "epoch": 6.22, "learning_rate": 5.536770921386306e-06, "loss": 0.4868, "step": 7364, "task_loss": 0.9453413486480713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6904684901237488, "epoch": 6.23, "learning_rate": 5.530733003260476e-06, "loss": 0.5795, "step": 7365, "task_loss": 0.8088664412498474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1944456696510315, "epoch": 6.23, "learning_rate": 5.524695085134646e-06, "loss": 0.4499, "step": 7366, "task_loss": 0.1197943240404129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3873312473297119, "epoch": 6.23, "learning_rate": 5.5186571670088154e-06, "loss": 0.365, "step": 7367, "task_loss": 0.5512471199035645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2314416915178299, "epoch": 6.23, "learning_rate": 5.512619248882985e-06, "loss": 0.3929, "step": 7368, "task_loss": 0.29062992334365845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33906030654907227, "epoch": 6.23, "learning_rate": 5.506581330757155e-06, "loss": 0.4155, "step": 7369, "task_loss": 0.5000306963920593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4262440800666809, "epoch": 6.23, "learning_rate": 5.5005434126313255e-06, "loss": 0.339, "step": 7370, "task_loss": 0.3971487283706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46672385931015015, "epoch": 6.23, "learning_rate": 5.494505494505494e-06, "loss": 0.523, "step": 7371, "task_loss": 0.6195513606071472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41375917196273804, "epoch": 6.23, "learning_rate": 5.488467576379665e-06, "loss": 0.4771, "step": 7372, "task_loss": 0.30415114760398865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8488360643386841, "epoch": 6.23, "learning_rate": 5.482429658253834e-06, "loss": 0.5253, "step": 7373, "task_loss": 1.3912296295166016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4554328918457031, "epoch": 6.23, "learning_rate": 5.476391740128004e-06, "loss": 0.4001, "step": 7374, "task_loss": 0.4204336404800415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6580468416213989, "epoch": 6.23, "learning_rate": 5.470353822002174e-06, "loss": 0.4416, "step": 7375, "task_loss": 1.1220093965530396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3626665472984314, "epoch": 6.23, "learning_rate": 5.464315903876344e-06, "loss": 0.4647, "step": 7376, "task_loss": 1.1311931610107422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4704763889312744, "epoch": 6.24, "learning_rate": 5.4582779857505135e-06, "loss": 0.4677, "step": 7377, "task_loss": 0.6023044586181641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23187902569770813, "epoch": 6.24, "learning_rate": 5.452240067624683e-06, "loss": 0.6061, "step": 7378, "task_loss": 0.9020999670028687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3436601758003235, "epoch": 6.24, "learning_rate": 5.446202149498853e-06, "loss": 0.3869, "step": 7379, "task_loss": 0.4765997529029846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32063955068588257, "epoch": 6.24, "learning_rate": 5.440164231373023e-06, "loss": 0.4078, "step": 7380, "task_loss": 0.6190409064292908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3719885051250458, "epoch": 6.24, "learning_rate": 5.434126313247192e-06, "loss": 0.3994, "step": 7381, "task_loss": 0.9592141509056091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48282116651535034, "epoch": 6.24, "learning_rate": 5.428088395121363e-06, "loss": 0.4522, "step": 7382, "task_loss": 0.3770902752876282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3770049512386322, "epoch": 6.24, "learning_rate": 5.422050476995532e-06, "loss": 0.5106, "step": 7383, "task_loss": 1.098600149154663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32676392793655396, "epoch": 6.24, "learning_rate": 5.4160125588697024e-06, "loss": 0.4064, "step": 7384, "task_loss": 0.08062362670898438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5843770503997803, "epoch": 6.24, "learning_rate": 5.409974640743871e-06, "loss": 0.4086, "step": 7385, "task_loss": 0.6854328513145447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44204598665237427, "epoch": 6.24, "learning_rate": 5.403936722618042e-06, "loss": 0.3637, "step": 7386, "task_loss": 1.2379357814788818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3880566358566284, "epoch": 6.24, "learning_rate": 5.397898804492212e-06, "loss": 0.4337, "step": 7387, "task_loss": 1.2023465633392334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5054335594177246, "epoch": 6.24, "learning_rate": 5.391860886366381e-06, "loss": 0.5176, "step": 7388, "task_loss": 0.19818831980228424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3425913453102112, "epoch": 6.25, "learning_rate": 5.385822968240551e-06, "loss": 0.3719, "step": 7389, "task_loss": 0.4872191250324249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7701621055603027, "epoch": 6.25, "learning_rate": 5.379785050114721e-06, "loss": 0.495, "step": 7390, "task_loss": 0.26044097542762756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40795350074768066, "epoch": 6.25, "learning_rate": 5.3737471319888905e-06, "loss": 0.4239, "step": 7391, "task_loss": 0.5950875282287598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5454784631729126, "epoch": 6.25, "learning_rate": 5.36770921386306e-06, "loss": 0.3849, "step": 7392, "task_loss": 0.8058868050575256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6760454773902893, "epoch": 6.25, "learning_rate": 5.36167129573723e-06, "loss": 0.548, "step": 7393, "task_loss": 1.0544650554656982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22901886701583862, "epoch": 6.25, "learning_rate": 5.3556333776114e-06, "loss": 0.3495, "step": 7394, "task_loss": 0.5884392261505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4032093584537506, "epoch": 6.25, "learning_rate": 5.349595459485569e-06, "loss": 0.3375, "step": 7395, "task_loss": 0.5870912075042725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31143710017204285, "epoch": 6.25, "learning_rate": 5.343557541359739e-06, "loss": 0.3784, "step": 7396, "task_loss": 0.1689995974302292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5785366296768188, "epoch": 6.25, "learning_rate": 5.33751962323391e-06, "loss": 0.3605, "step": 7397, "task_loss": 0.9896876811981201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5109443664550781, "epoch": 6.25, "learning_rate": 5.3314817051080786e-06, "loss": 0.4661, "step": 7398, "task_loss": 0.04805897921323776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6356993317604065, "epoch": 6.25, "learning_rate": 5.325443786982249e-06, "loss": 0.4749, "step": 7399, "task_loss": 1.2756054401397705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2623715102672577, "epoch": 6.26, "learning_rate": 5.319405868856418e-06, "loss": 0.3826, "step": 7400, "task_loss": 0.6776277422904968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5783810615539551, "epoch": 6.26, "learning_rate": 5.313367950730589e-06, "loss": 0.4556, "step": 7401, "task_loss": 0.336563378572464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.356137752532959, "epoch": 6.26, "learning_rate": 5.307330032604758e-06, "loss": 0.3666, "step": 7402, "task_loss": 0.3089883029460907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16587617993354797, "epoch": 6.26, "learning_rate": 5.301292114478928e-06, "loss": 0.25, "step": 7403, "task_loss": 0.369858056306839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6136224269866943, "epoch": 6.26, "learning_rate": 5.295254196353098e-06, "loss": 0.4745, "step": 7404, "task_loss": 0.1862764209508896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.336365669965744, "epoch": 6.26, "learning_rate": 5.2892162782272675e-06, "loss": 0.4337, "step": 7405, "task_loss": 0.5411155223846436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4495391249656677, "epoch": 6.26, "learning_rate": 5.283178360101437e-06, "loss": 0.4121, "step": 7406, "task_loss": 0.7356066703796387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31159916520118713, "epoch": 6.26, "learning_rate": 5.277140441975608e-06, "loss": 0.3717, "step": 7407, "task_loss": 0.44125157594680786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40781551599502563, "epoch": 6.26, "learning_rate": 5.271102523849777e-06, "loss": 0.4688, "step": 7408, "task_loss": 0.38573363423347473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.556952714920044, "epoch": 6.26, "learning_rate": 5.265064605723946e-06, "loss": 0.4247, "step": 7409, "task_loss": 0.6565746665000916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7109231948852539, "epoch": 6.26, "learning_rate": 5.259026687598116e-06, "loss": 0.6595, "step": 7410, "task_loss": 0.9191327691078186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26015159487724304, "epoch": 6.26, "learning_rate": 5.252988769472286e-06, "loss": 0.353, "step": 7411, "task_loss": 0.4643987715244293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2671029567718506, "epoch": 6.27, "learning_rate": 5.246950851346456e-06, "loss": 0.3617, "step": 7412, "task_loss": 0.6312130093574524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5368250012397766, "epoch": 6.27, "learning_rate": 5.240912933220625e-06, "loss": 0.6017, "step": 7413, "task_loss": 1.2658514976501465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2623820900917053, "epoch": 6.27, "learning_rate": 5.234875015094796e-06, "loss": 0.3661, "step": 7414, "task_loss": 0.4086884558200836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38856595754623413, "epoch": 6.27, "learning_rate": 5.228837096968965e-06, "loss": 0.3919, "step": 7415, "task_loss": 0.31083202362060547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5060527920722961, "epoch": 6.27, "learning_rate": 5.222799178843135e-06, "loss": 0.6069, "step": 7416, "task_loss": 1.4717689752578735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42010295391082764, "epoch": 6.27, "learning_rate": 5.216761260717305e-06, "loss": 0.4907, "step": 7417, "task_loss": 0.5572549700737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4489558935165405, "epoch": 6.27, "learning_rate": 5.210723342591475e-06, "loss": 0.38, "step": 7418, "task_loss": 0.1304665505886078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30602511763572693, "epoch": 6.27, "learning_rate": 5.2046854244656445e-06, "loss": 0.4126, "step": 7419, "task_loss": 1.026025652885437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6032537221908569, "epoch": 6.27, "learning_rate": 5.198647506339814e-06, "loss": 0.5059, "step": 7420, "task_loss": 1.0757315158843994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2799069881439209, "epoch": 6.27, "learning_rate": 5.192609588213984e-06, "loss": 0.4706, "step": 7421, "task_loss": 0.8361231684684753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18934589624404907, "epoch": 6.27, "learning_rate": 5.1865716700881545e-06, "loss": 0.3557, "step": 7422, "task_loss": 0.09834219515323639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35489460825920105, "epoch": 6.27, "learning_rate": 5.180533751962323e-06, "loss": 0.2697, "step": 7423, "task_loss": 0.2501721978187561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45717719197273254, "epoch": 6.28, "learning_rate": 5.174495833836494e-06, "loss": 0.3825, "step": 7424, "task_loss": 0.4972097873687744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43969860672950745, "epoch": 6.28, "learning_rate": 5.168457915710663e-06, "loss": 0.4208, "step": 7425, "task_loss": 1.141324520111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2302996814250946, "epoch": 6.28, "learning_rate": 5.162419997584833e-06, "loss": 0.32, "step": 7426, "task_loss": 0.3124285340309143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3646153509616852, "epoch": 6.28, "learning_rate": 5.156382079459002e-06, "loss": 0.3812, "step": 7427, "task_loss": 0.6510748863220215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21820974349975586, "epoch": 6.28, "learning_rate": 5.150344161333173e-06, "loss": 0.3455, "step": 7428, "task_loss": 0.30267438292503357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34554946422576904, "epoch": 6.28, "learning_rate": 5.1443062432073426e-06, "loss": 0.3494, "step": 7429, "task_loss": 0.1413135528564453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1820530742406845, "epoch": 6.28, "learning_rate": 5.138268325081512e-06, "loss": 0.3375, "step": 7430, "task_loss": 0.3372766375541687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6225518584251404, "epoch": 6.28, "learning_rate": 5.132230406955682e-06, "loss": 0.493, "step": 7431, "task_loss": 0.4200439155101776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5159047842025757, "epoch": 6.28, "learning_rate": 5.126192488829852e-06, "loss": 0.4264, "step": 7432, "task_loss": 0.4203190505504608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1653871089220047, "epoch": 6.28, "learning_rate": 5.1201545707040215e-06, "loss": 0.3187, "step": 7433, "task_loss": 0.36211875081062317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.659524142742157, "epoch": 6.28, "learning_rate": 5.114116652578191e-06, "loss": 0.5061, "step": 7434, "task_loss": 0.1781531274318695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2806876599788666, "epoch": 6.28, "learning_rate": 5.108078734452361e-06, "loss": 0.4059, "step": 7435, "task_loss": 0.06963673233985901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2963976263999939, "epoch": 6.29, "learning_rate": 5.102040816326531e-06, "loss": 0.3719, "step": 7436, "task_loss": 0.021247699856758118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4363195300102234, "epoch": 6.29, "learning_rate": 5.0960028982007e-06, "loss": 0.4445, "step": 7437, "task_loss": 0.4809010624885559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8268147706985474, "epoch": 6.29, "learning_rate": 5.08996498007487e-06, "loss": 0.646, "step": 7438, "task_loss": 0.3905881941318512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6067918539047241, "epoch": 6.29, "learning_rate": 5.083927061949041e-06, "loss": 0.4475, "step": 7439, "task_loss": 0.2572454810142517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4973216652870178, "epoch": 6.29, "learning_rate": 5.0778891438232095e-06, "loss": 0.3856, "step": 7440, "task_loss": 1.0486277341842651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35328763723373413, "epoch": 6.29, "learning_rate": 5.07185122569738e-06, "loss": 0.4463, "step": 7441, "task_loss": 0.6866239905357361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3136610686779022, "epoch": 6.29, "learning_rate": 5.065813307571549e-06, "loss": 0.3996, "step": 7442, "task_loss": 1.1094201803207397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5310568809509277, "epoch": 6.29, "learning_rate": 5.0597753894457195e-06, "loss": 0.4816, "step": 7443, "task_loss": 0.9902428388595581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47853708267211914, "epoch": 6.29, "learning_rate": 5.053737471319889e-06, "loss": 0.4335, "step": 7444, "task_loss": 0.041457246989011765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3184778094291687, "epoch": 6.29, "learning_rate": 5.047699553194059e-06, "loss": 0.5052, "step": 7445, "task_loss": 0.24103352427482605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2613599896430969, "epoch": 6.29, "learning_rate": 5.041661635068229e-06, "loss": 0.2967, "step": 7446, "task_loss": 0.11688145995140076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29496461153030396, "epoch": 6.29, "learning_rate": 5.0356237169423984e-06, "loss": 0.4397, "step": 7447, "task_loss": 0.34415164589881897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7145918011665344, "epoch": 6.3, "learning_rate": 5.029585798816568e-06, "loss": 0.5131, "step": 7448, "task_loss": 0.9403545260429382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5072611570358276, "epoch": 6.3, "learning_rate": 5.023547880690739e-06, "loss": 0.4163, "step": 7449, "task_loss": 0.8284904360771179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3969079554080963, "epoch": 6.3, "learning_rate": 5.017509962564908e-06, "loss": 0.4522, "step": 7450, "task_loss": 0.8418602347373962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49214908480644226, "epoch": 6.3, "learning_rate": 5.011472044439078e-06, "loss": 0.4181, "step": 7451, "task_loss": 0.8271406292915344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45256391167640686, "epoch": 6.3, "learning_rate": 5.005434126313247e-06, "loss": 0.4629, "step": 7452, "task_loss": 0.051237862557172775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4098917841911316, "epoch": 6.3, "learning_rate": 4.999396208187418e-06, "loss": 0.4457, "step": 7453, "task_loss": 0.36419373750686646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43007057905197144, "epoch": 6.3, "learning_rate": 4.993358290061587e-06, "loss": 0.455, "step": 7454, "task_loss": 0.7757449150085449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5634856224060059, "epoch": 6.3, "learning_rate": 4.987320371935757e-06, "loss": 0.4573, "step": 7455, "task_loss": 1.137110948562622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32444557547569275, "epoch": 6.3, "learning_rate": 4.981282453809927e-06, "loss": 0.366, "step": 7456, "task_loss": 0.2734954059123993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.314064621925354, "epoch": 6.3, "learning_rate": 4.9752445356840965e-06, "loss": 0.3964, "step": 7457, "task_loss": 1.006845474243164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36465707421302795, "epoch": 6.3, "learning_rate": 4.969206617558266e-06, "loss": 0.4777, "step": 7458, "task_loss": 0.6425678133964539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4745516777038574, "epoch": 6.3, "learning_rate": 4.963168699432436e-06, "loss": 0.493, "step": 7459, "task_loss": 0.8187721371650696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38694560527801514, "epoch": 6.31, "learning_rate": 4.957130781306606e-06, "loss": 0.3767, "step": 7460, "task_loss": 0.48382118344306946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3386395275592804, "epoch": 6.31, "learning_rate": 4.9510928631807754e-06, "loss": 0.3588, "step": 7461, "task_loss": 0.7395762801170349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49550771713256836, "epoch": 6.31, "learning_rate": 4.945054945054945e-06, "loss": 0.4234, "step": 7462, "task_loss": 0.3755562901496887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20921988785266876, "epoch": 6.31, "learning_rate": 4.939017026929115e-06, "loss": 0.5432, "step": 7463, "task_loss": 0.10890568792819977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2143513560295105, "epoch": 6.31, "learning_rate": 4.9329791088032854e-06, "loss": 0.3109, "step": 7464, "task_loss": 0.787300705909729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32300275564193726, "epoch": 6.31, "learning_rate": 4.926941190677454e-06, "loss": 0.4159, "step": 7465, "task_loss": 0.5513202548027039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.261161744594574, "epoch": 6.31, "learning_rate": 4.920903272551625e-06, "loss": 0.5107, "step": 7466, "task_loss": 0.6095837950706482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28015199303627014, "epoch": 6.31, "learning_rate": 4.914865354425794e-06, "loss": 0.3888, "step": 7467, "task_loss": 0.5899730324745178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4837396442890167, "epoch": 6.31, "learning_rate": 4.908827436299964e-06, "loss": 0.4278, "step": 7468, "task_loss": 0.675345778465271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2367822527885437, "epoch": 6.31, "learning_rate": 4.902789518174133e-06, "loss": 0.4057, "step": 7469, "task_loss": 0.14069326221942902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29746514558792114, "epoch": 6.31, "learning_rate": 4.896751600048304e-06, "loss": 0.4894, "step": 7470, "task_loss": 0.21853554248809814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42051124572753906, "epoch": 6.32, "learning_rate": 4.8907136819224735e-06, "loss": 0.4628, "step": 7471, "task_loss": 0.528052568435669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35436365008354187, "epoch": 6.32, "learning_rate": 4.884675763796643e-06, "loss": 0.3303, "step": 7472, "task_loss": 0.4779060184955597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2609892785549164, "epoch": 6.32, "learning_rate": 4.878637845670813e-06, "loss": 0.4379, "step": 7473, "task_loss": 0.723876953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6016688346862793, "epoch": 6.32, "learning_rate": 4.872599927544983e-06, "loss": 0.4576, "step": 7474, "task_loss": 0.7880899310112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3632658123970032, "epoch": 6.32, "learning_rate": 4.866562009419152e-06, "loss": 0.6327, "step": 7475, "task_loss": 0.7446721792221069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7677070498466492, "epoch": 6.32, "learning_rate": 4.860524091293322e-06, "loss": 0.4704, "step": 7476, "task_loss": 0.3463546633720398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38483577966690063, "epoch": 6.32, "learning_rate": 4.854486173167492e-06, "loss": 0.3704, "step": 7477, "task_loss": 0.17069073021411896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3742620646953583, "epoch": 6.32, "learning_rate": 4.848448255041662e-06, "loss": 0.3999, "step": 7478, "task_loss": 0.6305133700370789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6221630573272705, "epoch": 6.32, "learning_rate": 4.842410336915831e-06, "loss": 0.3871, "step": 7479, "task_loss": 0.5501518249511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27786779403686523, "epoch": 6.32, "learning_rate": 4.836372418790001e-06, "loss": 0.3501, "step": 7480, "task_loss": 1.025245189666748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2813095450401306, "epoch": 6.32, "learning_rate": 4.830334500664172e-06, "loss": 0.3635, "step": 7481, "task_loss": 0.374921053647995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2870813310146332, "epoch": 6.32, "learning_rate": 4.8242965825383405e-06, "loss": 0.4124, "step": 7482, "task_loss": 0.5259470343589783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4657238721847534, "epoch": 6.33, "learning_rate": 4.818258664412511e-06, "loss": 0.4062, "step": 7483, "task_loss": 0.9027307033538818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32433903217315674, "epoch": 6.33, "learning_rate": 4.81222074628668e-06, "loss": 0.3874, "step": 7484, "task_loss": 0.7425508499145508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7295410633087158, "epoch": 6.33, "learning_rate": 4.8061828281608505e-06, "loss": 0.4008, "step": 7485, "task_loss": 0.6107948422431946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2529045343399048, "epoch": 6.33, "learning_rate": 4.80014491003502e-06, "loss": 0.3821, "step": 7486, "task_loss": 0.45456260442733765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45728346705436707, "epoch": 6.33, "learning_rate": 4.79410699190919e-06, "loss": 0.4356, "step": 7487, "task_loss": 0.8311693072319031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23208969831466675, "epoch": 6.33, "learning_rate": 4.78806907378336e-06, "loss": 0.3519, "step": 7488, "task_loss": 0.12503524124622345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2905499339103699, "epoch": 6.33, "learning_rate": 4.782031155657529e-06, "loss": 0.3704, "step": 7489, "task_loss": 0.023636208847165108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47241345047950745, "epoch": 6.33, "learning_rate": 4.775993237531699e-06, "loss": 0.5011, "step": 7490, "task_loss": 0.22911174595355988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28566616773605347, "epoch": 6.33, "learning_rate": 4.76995531940587e-06, "loss": 0.479, "step": 7491, "task_loss": 0.42829686403274536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31720277667045593, "epoch": 6.33, "learning_rate": 4.7639174012800386e-06, "loss": 0.4517, "step": 7492, "task_loss": 0.42388537526130676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3090691566467285, "epoch": 6.33, "learning_rate": 4.757879483154209e-06, "loss": 0.4426, "step": 7493, "task_loss": 0.5193140506744385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4583055078983307, "epoch": 6.33, "learning_rate": 4.751841565028378e-06, "loss": 0.3626, "step": 7494, "task_loss": 0.5420424342155457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2785313129425049, "epoch": 6.34, "learning_rate": 4.745803646902549e-06, "loss": 0.4094, "step": 7495, "task_loss": 0.8193860650062561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41292303800582886, "epoch": 6.34, "learning_rate": 4.739765728776718e-06, "loss": 0.3711, "step": 7496, "task_loss": 0.7001169919967651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4504171311855316, "epoch": 6.34, "learning_rate": 4.733727810650888e-06, "loss": 0.4295, "step": 7497, "task_loss": 0.5550360083580017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3314458727836609, "epoch": 6.34, "learning_rate": 4.727689892525058e-06, "loss": 0.4695, "step": 7498, "task_loss": 0.4536001682281494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5099450349807739, "epoch": 6.34, "learning_rate": 4.7216519743992275e-06, "loss": 0.475, "step": 7499, "task_loss": 0.61962890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48199573159217834, "epoch": 6.34, "learning_rate": 4.715614056273397e-06, "loss": 0.4386, "step": 7500, "task_loss": 0.7066032290458679 }, { "epoch": 6.34, "eval_accuracy": 0.9112079207920792, "eval_loss": 0.273820161819458, "eval_runtime": 227.7817, "eval_samples_per_second": 110.852, "eval_steps_per_second": 0.869, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3561236560344696, "epoch": 6.34, "learning_rate": 4.709576138147567e-06, "loss": 0.322, "step": 7501, "task_loss": 1.0204592943191528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37394022941589355, "epoch": 6.34, "learning_rate": 4.703538220021737e-06, "loss": 0.4309, "step": 7502, "task_loss": 0.9365481734275818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27706557512283325, "epoch": 6.34, "learning_rate": 4.697500301895906e-06, "loss": 0.3313, "step": 7503, "task_loss": 0.7739271521568298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16937577724456787, "epoch": 6.34, "learning_rate": 4.691462383770076e-06, "loss": 0.3784, "step": 7504, "task_loss": 0.14141735434532166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.532852292060852, "epoch": 6.34, "learning_rate": 4.685424465644246e-06, "loss": 0.439, "step": 7505, "task_loss": 0.26980021595954895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5000603795051575, "epoch": 6.34, "learning_rate": 4.679386547518416e-06, "loss": 0.4344, "step": 7506, "task_loss": 0.7374952435493469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30799609422683716, "epoch": 6.35, "learning_rate": 4.673348629392585e-06, "loss": 0.4124, "step": 7507, "task_loss": 0.09331848472356796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27996543049812317, "epoch": 6.35, "learning_rate": 4.667310711266756e-06, "loss": 0.3709, "step": 7508, "task_loss": 0.9448261857032776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20915837585926056, "epoch": 6.35, "learning_rate": 4.661272793140925e-06, "loss": 0.3654, "step": 7509, "task_loss": 0.1709122359752655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49053695797920227, "epoch": 6.35, "learning_rate": 4.655234875015095e-06, "loss": 0.4347, "step": 7510, "task_loss": 0.6704981327056885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32792824506759644, "epoch": 6.35, "learning_rate": 4.649196956889264e-06, "loss": 0.4019, "step": 7511, "task_loss": 0.8451147079467773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37072181701660156, "epoch": 6.35, "learning_rate": 4.643159038763435e-06, "loss": 0.3631, "step": 7512, "task_loss": 0.15844447910785675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25223928689956665, "epoch": 6.35, "learning_rate": 4.6371211206376045e-06, "loss": 0.3173, "step": 7513, "task_loss": 0.38770201802253723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47233253717422485, "epoch": 6.35, "learning_rate": 4.631083202511774e-06, "loss": 0.4189, "step": 7514, "task_loss": 0.37192830443382263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2528648376464844, "epoch": 6.35, "learning_rate": 4.625045284385944e-06, "loss": 0.3386, "step": 7515, "task_loss": 0.5089412927627563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.335831880569458, "epoch": 6.35, "learning_rate": 4.619007366260114e-06, "loss": 0.3753, "step": 7516, "task_loss": 0.6398341655731201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33378735184669495, "epoch": 6.35, "learning_rate": 4.612969448134283e-06, "loss": 0.3604, "step": 7517, "task_loss": 0.20500139892101288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3438367247581482, "epoch": 6.35, "learning_rate": 4.606931530008454e-06, "loss": 0.4045, "step": 7518, "task_loss": 0.36127787828445435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37273192405700684, "epoch": 6.36, "learning_rate": 4.600893611882623e-06, "loss": 0.4005, "step": 7519, "task_loss": 0.26467981934547424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48539233207702637, "epoch": 6.36, "learning_rate": 4.594855693756793e-06, "loss": 0.6214, "step": 7520, "task_loss": 0.585095226764679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37473398447036743, "epoch": 6.36, "learning_rate": 4.588817775630962e-06, "loss": 0.4132, "step": 7521, "task_loss": 0.7003822326660156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4969085156917572, "epoch": 6.36, "learning_rate": 4.582779857505133e-06, "loss": 0.5346, "step": 7522, "task_loss": 1.1036744117736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19755706191062927, "epoch": 6.36, "learning_rate": 4.5767419393793026e-06, "loss": 0.3701, "step": 7523, "task_loss": 0.04986334592103958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39888885617256165, "epoch": 6.36, "learning_rate": 4.570704021253472e-06, "loss": 0.4021, "step": 7524, "task_loss": 0.4452453553676605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3544149100780487, "epoch": 6.36, "learning_rate": 4.564666103127642e-06, "loss": 0.5224, "step": 7525, "task_loss": 0.6101288199424744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3820301592350006, "epoch": 6.36, "learning_rate": 4.558628185001811e-06, "loss": 0.4929, "step": 7526, "task_loss": 1.1957510709762573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30141013860702515, "epoch": 6.36, "learning_rate": 4.5525902668759815e-06, "loss": 0.3253, "step": 7527, "task_loss": 0.08777723461389542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3219088613986969, "epoch": 6.36, "learning_rate": 4.546552348750151e-06, "loss": 0.4185, "step": 7528, "task_loss": 0.29596471786499023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32884734869003296, "epoch": 6.36, "learning_rate": 4.540514430624321e-06, "loss": 0.425, "step": 7529, "task_loss": 0.9923633933067322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49735143780708313, "epoch": 6.36, "learning_rate": 4.534476512498491e-06, "loss": 0.5002, "step": 7530, "task_loss": 0.2476939857006073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1843915730714798, "epoch": 6.37, "learning_rate": 4.52843859437266e-06, "loss": 0.3579, "step": 7531, "task_loss": 0.24017155170440674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2773257791996002, "epoch": 6.37, "learning_rate": 4.52240067624683e-06, "loss": 0.4201, "step": 7532, "task_loss": 0.8239900469779968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5478127598762512, "epoch": 6.37, "learning_rate": 4.516362758121001e-06, "loss": 0.5481, "step": 7533, "task_loss": 0.3263891339302063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33114004135131836, "epoch": 6.37, "learning_rate": 4.5103248399951695e-06, "loss": 0.385, "step": 7534, "task_loss": 0.41055619716644287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4305839240550995, "epoch": 6.37, "learning_rate": 4.50428692186934e-06, "loss": 0.3887, "step": 7535, "task_loss": 0.6155098080635071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3318480849266052, "epoch": 6.37, "learning_rate": 4.498249003743509e-06, "loss": 0.4037, "step": 7536, "task_loss": 0.1411130428314209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22847118973731995, "epoch": 6.37, "learning_rate": 4.4922110856176795e-06, "loss": 0.3806, "step": 7537, "task_loss": 0.18115925788879395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4208347499370575, "epoch": 6.37, "learning_rate": 4.486173167491849e-06, "loss": 0.474, "step": 7538, "task_loss": 0.9093618392944336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27996793389320374, "epoch": 6.37, "learning_rate": 4.480135249366019e-06, "loss": 0.5177, "step": 7539, "task_loss": 0.9155387878417969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3441300392150879, "epoch": 6.37, "learning_rate": 4.474097331240189e-06, "loss": 0.3253, "step": 7540, "task_loss": 0.32349810004234314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24208030104637146, "epoch": 6.37, "learning_rate": 4.4680594131143584e-06, "loss": 0.4382, "step": 7541, "task_loss": 0.264790803194046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2643689215183258, "epoch": 6.38, "learning_rate": 4.462021494988528e-06, "loss": 0.3755, "step": 7542, "task_loss": 0.27015185356140137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.368619441986084, "epoch": 6.38, "learning_rate": 4.455983576862698e-06, "loss": 0.3966, "step": 7543, "task_loss": 0.6519206762313843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3943847417831421, "epoch": 6.38, "learning_rate": 4.449945658736868e-06, "loss": 0.4394, "step": 7544, "task_loss": 1.2400013208389282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28756478428840637, "epoch": 6.38, "learning_rate": 4.443907740611037e-06, "loss": 0.3717, "step": 7545, "task_loss": 0.7410237193107605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21646368503570557, "epoch": 6.38, "learning_rate": 4.437869822485207e-06, "loss": 0.3485, "step": 7546, "task_loss": 0.08190867304801941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35251379013061523, "epoch": 6.38, "learning_rate": 4.431831904359377e-06, "loss": 0.4489, "step": 7547, "task_loss": 1.0965979099273682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49258944392204285, "epoch": 6.38, "learning_rate": 4.425793986233547e-06, "loss": 0.4465, "step": 7548, "task_loss": 0.7132380604743958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6008623838424683, "epoch": 6.38, "learning_rate": 4.419756068107716e-06, "loss": 0.4425, "step": 7549, "task_loss": 0.3820526599884033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.13495850563049316, "epoch": 6.38, "learning_rate": 4.413718149981887e-06, "loss": 0.2814, "step": 7550, "task_loss": 0.18869565427303314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4906533360481262, "epoch": 6.38, "learning_rate": 4.407680231856056e-06, "loss": 0.4471, "step": 7551, "task_loss": 1.428904414176941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48705172538757324, "epoch": 6.38, "learning_rate": 4.401642313730226e-06, "loss": 0.3429, "step": 7552, "task_loss": 0.20902010798454285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.392392635345459, "epoch": 6.38, "learning_rate": 4.395604395604396e-06, "loss": 0.2906, "step": 7553, "task_loss": 1.0537656545639038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2439027726650238, "epoch": 6.39, "learning_rate": 4.389566477478566e-06, "loss": 0.4182, "step": 7554, "task_loss": 1.2260949611663818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5100665092468262, "epoch": 6.39, "learning_rate": 4.383528559352735e-06, "loss": 0.4576, "step": 7555, "task_loss": 0.9553603529930115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3274306654930115, "epoch": 6.39, "learning_rate": 4.377490641226905e-06, "loss": 0.4924, "step": 7556, "task_loss": 0.18308515846729279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4941861629486084, "epoch": 6.39, "learning_rate": 4.371452723101075e-06, "loss": 0.519, "step": 7557, "task_loss": 0.44950351119041443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3427702486515045, "epoch": 6.39, "learning_rate": 4.365414804975245e-06, "loss": 0.4269, "step": 7558, "task_loss": 0.6943057775497437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2340335249900818, "epoch": 6.39, "learning_rate": 4.359376886849414e-06, "loss": 0.3883, "step": 7559, "task_loss": 0.8555739521980286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6850919723510742, "epoch": 6.39, "learning_rate": 4.353338968723585e-06, "loss": 0.4789, "step": 7560, "task_loss": 0.5528737306594849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3963630497455597, "epoch": 6.39, "learning_rate": 4.347301050597754e-06, "loss": 0.4623, "step": 7561, "task_loss": 1.5189728736877441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6989535093307495, "epoch": 6.39, "learning_rate": 4.341263132471924e-06, "loss": 0.3597, "step": 7562, "task_loss": 0.6105638742446899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26403138041496277, "epoch": 6.39, "learning_rate": 4.335225214346093e-06, "loss": 0.4618, "step": 7563, "task_loss": 0.15609999001026154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5988813638687134, "epoch": 6.39, "learning_rate": 4.329187296220264e-06, "loss": 0.5615, "step": 7564, "task_loss": 0.2901882827281952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47591882944107056, "epoch": 6.39, "learning_rate": 4.3231493780944335e-06, "loss": 0.4794, "step": 7565, "task_loss": 0.6742881536483765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5205352306365967, "epoch": 6.4, "learning_rate": 4.317111459968603e-06, "loss": 0.4945, "step": 7566, "task_loss": 0.741621196269989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38645514845848083, "epoch": 6.4, "learning_rate": 4.311073541842773e-06, "loss": 0.3531, "step": 7567, "task_loss": 0.6004335880279541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5187391042709351, "epoch": 6.4, "learning_rate": 4.305035623716943e-06, "loss": 0.4456, "step": 7568, "task_loss": 0.7985501885414124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46640944480895996, "epoch": 6.4, "learning_rate": 4.298997705591112e-06, "loss": 0.5245, "step": 7569, "task_loss": 0.19216766953468323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35804954171180725, "epoch": 6.4, "learning_rate": 4.292959787465282e-06, "loss": 0.4729, "step": 7570, "task_loss": 0.8343668580055237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3087470531463623, "epoch": 6.4, "learning_rate": 4.286921869339452e-06, "loss": 0.4095, "step": 7571, "task_loss": 0.1514779031276703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.557770311832428, "epoch": 6.4, "learning_rate": 4.280883951213622e-06, "loss": 0.482, "step": 7572, "task_loss": 1.0636582374572754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35276347398757935, "epoch": 6.4, "learning_rate": 4.274846033087791e-06, "loss": 0.4895, "step": 7573, "task_loss": 0.872612714767456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32482096552848816, "epoch": 6.4, "learning_rate": 4.268808114961961e-06, "loss": 0.438, "step": 7574, "task_loss": 1.0699458122253418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5266396999359131, "epoch": 6.4, "learning_rate": 4.262770196836132e-06, "loss": 0.5185, "step": 7575, "task_loss": 1.4329333305358887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5066378116607666, "epoch": 6.4, "learning_rate": 4.2567322787103005e-06, "loss": 0.4627, "step": 7576, "task_loss": 0.16584105789661407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3045175075531006, "epoch": 6.4, "learning_rate": 4.250694360584471e-06, "loss": 0.3664, "step": 7577, "task_loss": 0.1249660849571228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17197775840759277, "epoch": 6.41, "learning_rate": 4.24465644245864e-06, "loss": 0.3831, "step": 7578, "task_loss": 0.28768932819366455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2579072117805481, "epoch": 6.41, "learning_rate": 4.2386185243328105e-06, "loss": 0.3706, "step": 7579, "task_loss": 0.6752832531929016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6407304406166077, "epoch": 6.41, "learning_rate": 4.23258060620698e-06, "loss": 0.4383, "step": 7580, "task_loss": 0.33479073643684387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6379284858703613, "epoch": 6.41, "learning_rate": 4.22654268808115e-06, "loss": 0.4051, "step": 7581, "task_loss": 1.0305145978927612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39257991313934326, "epoch": 6.41, "learning_rate": 4.22050476995532e-06, "loss": 0.3742, "step": 7582, "task_loss": 0.3103486895561218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36527305841445923, "epoch": 6.41, "learning_rate": 4.214466851829489e-06, "loss": 0.4225, "step": 7583, "task_loss": 0.759556770324707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2443835288286209, "epoch": 6.41, "learning_rate": 4.208428933703659e-06, "loss": 0.3774, "step": 7584, "task_loss": 0.44712769985198975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33049914240837097, "epoch": 6.41, "learning_rate": 4.20239101557783e-06, "loss": 0.35, "step": 7585, "task_loss": 0.5728400349617004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9545589685440063, "epoch": 6.41, "learning_rate": 4.1963530974519986e-06, "loss": 0.5458, "step": 7586, "task_loss": 1.572511911392212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5978804230690002, "epoch": 6.41, "learning_rate": 4.190315179326169e-06, "loss": 0.524, "step": 7587, "task_loss": 1.3174279928207397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5475623607635498, "epoch": 6.41, "learning_rate": 4.184277261200338e-06, "loss": 0.4915, "step": 7588, "task_loss": 0.8397957682609558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6554060578346252, "epoch": 6.41, "learning_rate": 4.178239343074508e-06, "loss": 0.5275, "step": 7589, "task_loss": 1.1806100606918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4321490228176117, "epoch": 6.42, "learning_rate": 4.172201424948678e-06, "loss": 0.3494, "step": 7590, "task_loss": 0.1076197698712349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37346410751342773, "epoch": 6.42, "learning_rate": 4.166163506822847e-06, "loss": 0.3886, "step": 7591, "task_loss": 0.7234314680099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2808346152305603, "epoch": 6.42, "learning_rate": 4.160125588697018e-06, "loss": 0.4969, "step": 7592, "task_loss": 0.30329838395118713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4248100519180298, "epoch": 6.42, "learning_rate": 4.154087670571187e-06, "loss": 0.4096, "step": 7593, "task_loss": 1.4562383890151978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5851520895957947, "epoch": 6.42, "learning_rate": 4.148049752445357e-06, "loss": 0.4263, "step": 7594, "task_loss": 0.4535808563232422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3406076431274414, "epoch": 6.42, "learning_rate": 4.142011834319527e-06, "loss": 0.3837, "step": 7595, "task_loss": 0.698113203048706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40253448486328125, "epoch": 6.42, "learning_rate": 4.135973916193697e-06, "loss": 0.4029, "step": 7596, "task_loss": 0.19418777525424957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3850042223930359, "epoch": 6.42, "learning_rate": 4.129935998067866e-06, "loss": 0.4169, "step": 7597, "task_loss": 0.2745683193206787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.566947340965271, "epoch": 6.42, "learning_rate": 4.123898079942036e-06, "loss": 0.4363, "step": 7598, "task_loss": 0.16471554338932037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2935982346534729, "epoch": 6.42, "learning_rate": 4.117860161816206e-06, "loss": 0.3777, "step": 7599, "task_loss": 1.0851373672485352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6052749752998352, "epoch": 6.42, "learning_rate": 4.1118222436903755e-06, "loss": 0.4096, "step": 7600, "task_loss": 0.6631654500961304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36230379343032837, "epoch": 6.42, "learning_rate": 4.105784325564545e-06, "loss": 0.2496, "step": 7601, "task_loss": 1.0661555528640747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.514445424079895, "epoch": 6.43, "learning_rate": 4.099746407438716e-06, "loss": 0.3579, "step": 7602, "task_loss": 0.27308452129364014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2838478684425354, "epoch": 6.43, "learning_rate": 4.093708489312885e-06, "loss": 0.454, "step": 7603, "task_loss": 0.3346703350543976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21703946590423584, "epoch": 6.43, "learning_rate": 4.087670571187055e-06, "loss": 0.3625, "step": 7604, "task_loss": 0.6335638165473938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31049904227256775, "epoch": 6.43, "learning_rate": 4.081632653061224e-06, "loss": 0.3488, "step": 7605, "task_loss": 0.18147899210453033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16660843789577484, "epoch": 6.43, "learning_rate": 4.075594734935395e-06, "loss": 0.3058, "step": 7606, "task_loss": 0.555975615978241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36628010869026184, "epoch": 6.43, "learning_rate": 4.0695568168095645e-06, "loss": 0.3826, "step": 7607, "task_loss": 0.4056656062602997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36825108528137207, "epoch": 6.43, "learning_rate": 4.063518898683734e-06, "loss": 0.429, "step": 7608, "task_loss": 1.3975794315338135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.254421591758728, "epoch": 6.43, "learning_rate": 4.057480980557904e-06, "loss": 0.454, "step": 7609, "task_loss": 1.0104660987854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6730448603630066, "epoch": 6.43, "learning_rate": 4.051443062432074e-06, "loss": 0.377, "step": 7610, "task_loss": 0.6257966160774231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46320852637290955, "epoch": 6.43, "learning_rate": 4.045405144306243e-06, "loss": 0.3921, "step": 7611, "task_loss": 0.8142383694648743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.264749139547348, "epoch": 6.43, "learning_rate": 4.039367226180413e-06, "loss": 0.3795, "step": 7612, "task_loss": 0.5733726620674133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4003748595714569, "epoch": 6.44, "learning_rate": 4.033329308054583e-06, "loss": 0.4301, "step": 7613, "task_loss": 0.23237332701683044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4649609923362732, "epoch": 6.44, "learning_rate": 4.0272913899287525e-06, "loss": 0.4972, "step": 7614, "task_loss": 1.7550774812698364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6407753229141235, "epoch": 6.44, "learning_rate": 4.021253471802922e-06, "loss": 0.58, "step": 7615, "task_loss": 0.4978964030742645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30274927616119385, "epoch": 6.44, "learning_rate": 4.015215553677092e-06, "loss": 0.5138, "step": 7616, "task_loss": 1.0885311365127563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6557941436767578, "epoch": 6.44, "learning_rate": 4.0091776355512625e-06, "loss": 0.4202, "step": 7617, "task_loss": 0.3223996162414551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5106455087661743, "epoch": 6.44, "learning_rate": 4.0031397174254314e-06, "loss": 0.4253, "step": 7618, "task_loss": 0.7875860333442688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39976930618286133, "epoch": 6.44, "learning_rate": 3.997101799299602e-06, "loss": 0.4628, "step": 7619, "task_loss": 1.2734344005584717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3517210781574249, "epoch": 6.44, "learning_rate": 3.991063881173771e-06, "loss": 0.3506, "step": 7620, "task_loss": 0.6845234632492065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2996918559074402, "epoch": 6.44, "learning_rate": 3.9850259630479414e-06, "loss": 0.421, "step": 7621, "task_loss": 0.09655660390853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4344547688961029, "epoch": 6.44, "learning_rate": 3.978988044922111e-06, "loss": 0.4774, "step": 7622, "task_loss": 0.5702031850814819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44666552543640137, "epoch": 6.44, "learning_rate": 3.972950126796281e-06, "loss": 0.4365, "step": 7623, "task_loss": 0.7123099565505981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5679116249084473, "epoch": 6.44, "learning_rate": 3.966912208670451e-06, "loss": 0.3733, "step": 7624, "task_loss": 0.661853015422821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34131571650505066, "epoch": 6.45, "learning_rate": 3.96087429054462e-06, "loss": 0.4266, "step": 7625, "task_loss": 0.3214270770549774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4605754613876343, "epoch": 6.45, "learning_rate": 3.95483637241879e-06, "loss": 0.4444, "step": 7626, "task_loss": 0.5668392181396484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5282625555992126, "epoch": 6.45, "learning_rate": 3.948798454292961e-06, "loss": 0.5315, "step": 7627, "task_loss": 0.7395278215408325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3747004270553589, "epoch": 6.45, "learning_rate": 3.9427605361671295e-06, "loss": 0.4781, "step": 7628, "task_loss": 0.5778380036354065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27020007371902466, "epoch": 6.45, "learning_rate": 3.9367226180413e-06, "loss": 0.3376, "step": 7629, "task_loss": 0.521760106086731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5351536870002747, "epoch": 6.45, "learning_rate": 3.930684699915469e-06, "loss": 0.5025, "step": 7630, "task_loss": 0.667941689491272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4667362570762634, "epoch": 6.45, "learning_rate": 3.9246467817896395e-06, "loss": 0.5077, "step": 7631, "task_loss": 0.061607230454683304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4018675684928894, "epoch": 6.45, "learning_rate": 3.918608863663809e-06, "loss": 0.4813, "step": 7632, "task_loss": 1.7519304752349854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5138986110687256, "epoch": 6.45, "learning_rate": 3.912570945537979e-06, "loss": 0.4764, "step": 7633, "task_loss": 0.4051055610179901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2937704920768738, "epoch": 6.45, "learning_rate": 3.906533027412149e-06, "loss": 0.3064, "step": 7634, "task_loss": 0.8094771504402161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.385277658700943, "epoch": 6.45, "learning_rate": 3.9004951092863184e-06, "loss": 0.4183, "step": 7635, "task_loss": 0.8220266103744507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3171766400337219, "epoch": 6.45, "learning_rate": 3.894457191160488e-06, "loss": 0.3405, "step": 7636, "task_loss": 0.21889159083366394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17481842637062073, "epoch": 6.46, "learning_rate": 3.888419273034658e-06, "loss": 0.5309, "step": 7637, "task_loss": 0.6263213157653809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3915245831012726, "epoch": 6.46, "learning_rate": 3.882381354908828e-06, "loss": 0.418, "step": 7638, "task_loss": 1.416357398033142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5309879183769226, "epoch": 6.46, "learning_rate": 3.876343436782997e-06, "loss": 0.7501, "step": 7639, "task_loss": 0.47290876507759094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27492040395736694, "epoch": 6.46, "learning_rate": 3.870305518657167e-06, "loss": 0.3447, "step": 7640, "task_loss": 0.3564997911453247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23887483775615692, "epoch": 6.46, "learning_rate": 3.864267600531337e-06, "loss": 0.3217, "step": 7641, "task_loss": 0.46908557415008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47848498821258545, "epoch": 6.46, "learning_rate": 3.858229682405507e-06, "loss": 0.438, "step": 7642, "task_loss": 0.3249066472053528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.651793360710144, "epoch": 6.46, "learning_rate": 3.852191764279676e-06, "loss": 0.4527, "step": 7643, "task_loss": 1.4709583520889282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38558194041252136, "epoch": 6.46, "learning_rate": 3.846153846153847e-06, "loss": 0.3581, "step": 7644, "task_loss": 0.15202544629573822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2086794078350067, "epoch": 6.46, "learning_rate": 3.840115928028016e-06, "loss": 0.3344, "step": 7645, "task_loss": 0.46285197138786316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34382709860801697, "epoch": 6.46, "learning_rate": 3.834078009902186e-06, "loss": 0.4527, "step": 7646, "task_loss": 0.3009686768054962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39628106355667114, "epoch": 6.46, "learning_rate": 3.828040091776355e-06, "loss": 0.2783, "step": 7647, "task_loss": 0.2661716639995575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42240530252456665, "epoch": 6.46, "learning_rate": 3.822002173650526e-06, "loss": 0.3717, "step": 7648, "task_loss": 1.3972136974334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2487010955810547, "epoch": 6.47, "learning_rate": 3.815964255524695e-06, "loss": 0.2774, "step": 7649, "task_loss": 0.24234063923358917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5285531878471375, "epoch": 6.47, "learning_rate": 3.8099263373988647e-06, "loss": 0.4504, "step": 7650, "task_loss": 0.22108548879623413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25214970111846924, "epoch": 6.47, "learning_rate": 3.803888419273035e-06, "loss": 0.4017, "step": 7651, "task_loss": 0.8750590085983276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34087246656417847, "epoch": 6.47, "learning_rate": 3.797850501147204e-06, "loss": 0.4213, "step": 7652, "task_loss": 0.6880748867988586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4011131823062897, "epoch": 6.47, "learning_rate": 3.7918125830213743e-06, "loss": 0.4774, "step": 7653, "task_loss": 0.24854843318462372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3821079730987549, "epoch": 6.47, "learning_rate": 3.7857746648955445e-06, "loss": 0.4651, "step": 7654, "task_loss": 0.44583743810653687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27302879095077515, "epoch": 6.47, "learning_rate": 3.7797367467697138e-06, "loss": 0.294, "step": 7655, "task_loss": 0.29399457573890686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5970277786254883, "epoch": 6.47, "learning_rate": 3.773698828643884e-06, "loss": 0.3895, "step": 7656, "task_loss": 0.6561475396156311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34838950634002686, "epoch": 6.47, "learning_rate": 3.767660910518053e-06, "loss": 0.44, "step": 7657, "task_loss": 0.1460404098033905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4720696210861206, "epoch": 6.47, "learning_rate": 3.7616229923922234e-06, "loss": 0.455, "step": 7658, "task_loss": 1.0234571695327759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40535807609558105, "epoch": 6.47, "learning_rate": 3.7555850742663935e-06, "loss": 0.3907, "step": 7659, "task_loss": 0.1924181878566742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46643105149269104, "epoch": 6.47, "learning_rate": 3.749547156140563e-06, "loss": 0.3699, "step": 7660, "task_loss": 0.3735228180885315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21229948103427887, "epoch": 6.48, "learning_rate": 3.743509238014733e-06, "loss": 0.358, "step": 7661, "task_loss": 0.3932367265224457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2838762402534485, "epoch": 6.48, "learning_rate": 3.7374713198889023e-06, "loss": 0.4374, "step": 7662, "task_loss": 0.6160967350006104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4643041491508484, "epoch": 6.48, "learning_rate": 3.7314334017630724e-06, "loss": 0.5235, "step": 7663, "task_loss": 0.37698498368263245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3250167965888977, "epoch": 6.48, "learning_rate": 3.725395483637242e-06, "loss": 0.5315, "step": 7664, "task_loss": 0.5543719530105591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4892522990703583, "epoch": 6.48, "learning_rate": 3.719357565511412e-06, "loss": 0.4295, "step": 7665, "task_loss": 1.6850335597991943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5706751346588135, "epoch": 6.48, "learning_rate": 3.7133196473855816e-06, "loss": 0.5473, "step": 7666, "task_loss": 0.9194045066833496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28616926074028015, "epoch": 6.48, "learning_rate": 3.7072817292597513e-06, "loss": 0.4069, "step": 7667, "task_loss": 0.625519871711731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39594894647598267, "epoch": 6.48, "learning_rate": 3.701243811133921e-06, "loss": 0.4118, "step": 7668, "task_loss": 0.5988566279411316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26500722765922546, "epoch": 6.48, "learning_rate": 3.695205893008091e-06, "loss": 0.3213, "step": 7669, "task_loss": 0.35659363865852356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3120731711387634, "epoch": 6.48, "learning_rate": 3.6891679748822605e-06, "loss": 0.3607, "step": 7670, "task_loss": 1.3844609260559082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35540738701820374, "epoch": 6.48, "learning_rate": 3.6831300567564306e-06, "loss": 0.4473, "step": 7671, "task_loss": 0.831452488899231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44631096720695496, "epoch": 6.48, "learning_rate": 3.6770921386306e-06, "loss": 0.4504, "step": 7672, "task_loss": 0.3051972985267639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24287714064121246, "epoch": 6.49, "learning_rate": 3.67105422050477e-06, "loss": 0.3644, "step": 7673, "task_loss": 0.20481258630752563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3949527144432068, "epoch": 6.49, "learning_rate": 3.66501630237894e-06, "loss": 0.3633, "step": 7674, "task_loss": 0.4955171048641205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4386295676231384, "epoch": 6.49, "learning_rate": 3.6589783842531095e-06, "loss": 0.3712, "step": 7675, "task_loss": 0.21991212666034698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3999235928058624, "epoch": 6.49, "learning_rate": 3.6529404661272797e-06, "loss": 0.3757, "step": 7676, "task_loss": 0.41477474570274353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3232361674308777, "epoch": 6.49, "learning_rate": 3.646902548001449e-06, "loss": 0.4137, "step": 7677, "task_loss": 0.0927613154053688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35872411727905273, "epoch": 6.49, "learning_rate": 3.640864629875619e-06, "loss": 0.4782, "step": 7678, "task_loss": 0.6009278893470764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3037814497947693, "epoch": 6.49, "learning_rate": 3.6348267117497893e-06, "loss": 0.4365, "step": 7679, "task_loss": 0.42444831132888794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3644055128097534, "epoch": 6.49, "learning_rate": 3.6287887936239586e-06, "loss": 0.5637, "step": 7680, "task_loss": 0.8057777285575867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22185848653316498, "epoch": 6.49, "learning_rate": 3.6227508754981287e-06, "loss": 0.307, "step": 7681, "task_loss": 0.08156996965408325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48345449566841125, "epoch": 6.49, "learning_rate": 3.616712957372298e-06, "loss": 0.4428, "step": 7682, "task_loss": 0.08626305311918259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2031942903995514, "epoch": 6.49, "learning_rate": 3.610675039246468e-06, "loss": 0.4034, "step": 7683, "task_loss": 0.012371708638966084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2889648675918579, "epoch": 6.5, "learning_rate": 3.6046371211206383e-06, "loss": 0.4895, "step": 7684, "task_loss": 0.6243511438369751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30407506227493286, "epoch": 6.5, "learning_rate": 3.5985992029948076e-06, "loss": 0.4189, "step": 7685, "task_loss": 0.8658360838890076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23634982109069824, "epoch": 6.5, "learning_rate": 3.5925612848689777e-06, "loss": 0.3804, "step": 7686, "task_loss": 0.33452314138412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3774471580982208, "epoch": 6.5, "learning_rate": 3.586523366743147e-06, "loss": 0.3363, "step": 7687, "task_loss": 0.8340803384780884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4032588303089142, "epoch": 6.5, "learning_rate": 3.580485448617317e-06, "loss": 0.3953, "step": 7688, "task_loss": 0.8952805995941162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2760763168334961, "epoch": 6.5, "learning_rate": 3.5744475304914865e-06, "loss": 0.387, "step": 7689, "task_loss": 0.5491968393325806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4760347306728363, "epoch": 6.5, "learning_rate": 3.5684096123656566e-06, "loss": 0.4099, "step": 7690, "task_loss": 1.4676611423492432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4849081337451935, "epoch": 6.5, "learning_rate": 3.5623716942398264e-06, "loss": 0.4279, "step": 7691, "task_loss": 1.237672209739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18625405430793762, "epoch": 6.5, "learning_rate": 3.5563337761139957e-06, "loss": 0.4516, "step": 7692, "task_loss": 0.06318674981594086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5162789821624756, "epoch": 6.5, "learning_rate": 3.550295857988166e-06, "loss": 0.4514, "step": 7693, "task_loss": 0.5883114337921143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23461666703224182, "epoch": 6.5, "learning_rate": 3.544257939862335e-06, "loss": 0.2843, "step": 7694, "task_loss": 0.3116132318973541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29992425441741943, "epoch": 6.5, "learning_rate": 3.5382200217365053e-06, "loss": 0.3473, "step": 7695, "task_loss": 0.31048455834388733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5551434755325317, "epoch": 6.51, "learning_rate": 3.5321821036106754e-06, "loss": 0.443, "step": 7696, "task_loss": 1.0268428325653076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4671921730041504, "epoch": 6.51, "learning_rate": 3.5261441854848447e-06, "loss": 0.4429, "step": 7697, "task_loss": 1.5239801406860352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7202664017677307, "epoch": 6.51, "learning_rate": 3.520106267359015e-06, "loss": 0.4115, "step": 7698, "task_loss": 0.8783277869224548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2792589068412781, "epoch": 6.51, "learning_rate": 3.514068349233184e-06, "loss": 0.4367, "step": 7699, "task_loss": 0.2987062633037567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4970380663871765, "epoch": 6.51, "learning_rate": 3.5080304311073543e-06, "loss": 0.3929, "step": 7700, "task_loss": 0.6271637678146362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5262926816940308, "epoch": 6.51, "learning_rate": 3.5019925129815245e-06, "loss": 0.5123, "step": 7701, "task_loss": 0.20182503759860992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24873396754264832, "epoch": 6.51, "learning_rate": 3.4959545948556938e-06, "loss": 0.3899, "step": 7702, "task_loss": 0.3961564600467682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39284196496009827, "epoch": 6.51, "learning_rate": 3.489916676729864e-06, "loss": 0.3842, "step": 7703, "task_loss": 0.5264989733695984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2523111402988434, "epoch": 6.51, "learning_rate": 3.483878758604033e-06, "loss": 0.427, "step": 7704, "task_loss": 0.5021206736564636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6154608726501465, "epoch": 6.51, "learning_rate": 3.4778408404782034e-06, "loss": 0.4715, "step": 7705, "task_loss": 0.3391534984111786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6082305908203125, "epoch": 6.51, "learning_rate": 3.4718029223523735e-06, "loss": 0.3934, "step": 7706, "task_loss": 0.3663659989833832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32460853457450867, "epoch": 6.51, "learning_rate": 3.465765004226543e-06, "loss": 0.3508, "step": 7707, "task_loss": 0.7027719616889954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5089862942695618, "epoch": 6.52, "learning_rate": 3.459727086100713e-06, "loss": 0.4111, "step": 7708, "task_loss": 1.740857481956482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2740253508090973, "epoch": 6.52, "learning_rate": 3.4536891679748822e-06, "loss": 0.4096, "step": 7709, "task_loss": 0.4862251877784729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8225532174110413, "epoch": 6.52, "learning_rate": 3.4476512498490524e-06, "loss": 0.5542, "step": 7710, "task_loss": 0.852367103099823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4984482526779175, "epoch": 6.52, "learning_rate": 3.441613331723222e-06, "loss": 0.5627, "step": 7711, "task_loss": 0.2050698697566986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25185441970825195, "epoch": 6.52, "learning_rate": 3.435575413597392e-06, "loss": 0.3759, "step": 7712, "task_loss": 0.8190176486968994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2851306200027466, "epoch": 6.52, "learning_rate": 3.4295374954715616e-06, "loss": 0.4244, "step": 7713, "task_loss": 0.3571352958679199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3931918740272522, "epoch": 6.52, "learning_rate": 3.4234995773457313e-06, "loss": 0.4807, "step": 7714, "task_loss": 0.5169183015823364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3308052122592926, "epoch": 6.52, "learning_rate": 3.417461659219901e-06, "loss": 0.3833, "step": 7715, "task_loss": 0.9095100164413452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2579752802848816, "epoch": 6.52, "learning_rate": 3.411423741094071e-06, "loss": 0.4206, "step": 7716, "task_loss": 0.39869749546051025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24683046340942383, "epoch": 6.52, "learning_rate": 3.4053858229682405e-06, "loss": 0.3573, "step": 7717, "task_loss": 0.29481446743011475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5046948194503784, "epoch": 6.52, "learning_rate": 3.3993479048424106e-06, "loss": 0.5402, "step": 7718, "task_loss": 0.5728384852409363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2638630270957947, "epoch": 6.52, "learning_rate": 3.39330998671658e-06, "loss": 0.4007, "step": 7719, "task_loss": 0.6914132833480835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3226665258407593, "epoch": 6.53, "learning_rate": 3.38727206859075e-06, "loss": 0.431, "step": 7720, "task_loss": 0.8165730834007263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6342872381210327, "epoch": 6.53, "learning_rate": 3.38123415046492e-06, "loss": 0.4441, "step": 7721, "task_loss": 0.3512526750564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5903358459472656, "epoch": 6.53, "learning_rate": 3.3751962323390895e-06, "loss": 0.5681, "step": 7722, "task_loss": 0.7592611908912659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2888670861721039, "epoch": 6.53, "learning_rate": 3.3691583142132597e-06, "loss": 0.4407, "step": 7723, "task_loss": 0.12918084859848022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2667515277862549, "epoch": 6.53, "learning_rate": 3.363120396087429e-06, "loss": 0.4991, "step": 7724, "task_loss": 0.2222709357738495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31435298919677734, "epoch": 6.53, "learning_rate": 3.357082477961599e-06, "loss": 0.4506, "step": 7725, "task_loss": 0.9514991641044617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25086653232574463, "epoch": 6.53, "learning_rate": 3.3510445598357693e-06, "loss": 0.4223, "step": 7726, "task_loss": 0.5273663997650146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3931138515472412, "epoch": 6.53, "learning_rate": 3.3450066417099386e-06, "loss": 0.3153, "step": 7727, "task_loss": 0.20566728711128235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31249791383743286, "epoch": 6.53, "learning_rate": 3.3389687235841087e-06, "loss": 0.3433, "step": 7728, "task_loss": 0.02320913039147854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4897136688232422, "epoch": 6.53, "learning_rate": 3.332930805458278e-06, "loss": 0.3873, "step": 7729, "task_loss": 0.4566836953163147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2944187819957733, "epoch": 6.53, "learning_rate": 3.326892887332448e-06, "loss": 0.3735, "step": 7730, "task_loss": 0.5994160175323486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29624760150909424, "epoch": 6.53, "learning_rate": 3.3208549692066174e-06, "loss": 0.3797, "step": 7731, "task_loss": 0.6483547687530518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28410589694976807, "epoch": 6.54, "learning_rate": 3.3148170510807876e-06, "loss": 0.3718, "step": 7732, "task_loss": 0.9737540483474731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4408337473869324, "epoch": 6.54, "learning_rate": 3.3087791329549573e-06, "loss": 0.3817, "step": 7733, "task_loss": 0.7905225157737732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5130460858345032, "epoch": 6.54, "learning_rate": 3.302741214829127e-06, "loss": 0.4205, "step": 7734, "task_loss": 0.6533775329589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27270805835723877, "epoch": 6.54, "learning_rate": 3.2967032967032968e-06, "loss": 0.4238, "step": 7735, "task_loss": 0.3389483690261841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5491988062858582, "epoch": 6.54, "learning_rate": 3.2906653785774665e-06, "loss": 0.3859, "step": 7736, "task_loss": 1.2513906955718994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8034136295318604, "epoch": 6.54, "learning_rate": 3.2846274604516362e-06, "loss": 0.5724, "step": 7737, "task_loss": 1.2257740497589111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3546361029148102, "epoch": 6.54, "learning_rate": 3.2785895423258064e-06, "loss": 0.3237, "step": 7738, "task_loss": 0.7468657493591309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3124690353870392, "epoch": 6.54, "learning_rate": 3.2725516241999757e-06, "loss": 0.3118, "step": 7739, "task_loss": 0.31462687253952026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2261171042919159, "epoch": 6.54, "learning_rate": 3.266513706074146e-06, "loss": 0.3055, "step": 7740, "task_loss": 0.470099538564682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2768022418022156, "epoch": 6.54, "learning_rate": 3.260475787948315e-06, "loss": 0.3323, "step": 7741, "task_loss": 0.6580920219421387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23272868990898132, "epoch": 6.54, "learning_rate": 3.2544378698224853e-06, "loss": 0.298, "step": 7742, "task_loss": 0.17747752368450165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3885907530784607, "epoch": 6.54, "learning_rate": 3.2483999516966554e-06, "loss": 0.3377, "step": 7743, "task_loss": 0.3679782450199127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5739710330963135, "epoch": 6.55, "learning_rate": 3.2423620335708247e-06, "loss": 0.5162, "step": 7744, "task_loss": 0.607576310634613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2692696154117584, "epoch": 6.55, "learning_rate": 3.236324115444995e-06, "loss": 0.3574, "step": 7745, "task_loss": 0.5703031420707703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2844216227531433, "epoch": 6.55, "learning_rate": 3.230286197319164e-06, "loss": 0.4341, "step": 7746, "task_loss": 0.146365687251091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.678672194480896, "epoch": 6.55, "learning_rate": 3.2242482791933343e-06, "loss": 0.5235, "step": 7747, "task_loss": 1.024153709411621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29055529832839966, "epoch": 6.55, "learning_rate": 3.2182103610675045e-06, "loss": 0.3627, "step": 7748, "task_loss": 0.08211304247379303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4737032949924469, "epoch": 6.55, "learning_rate": 3.2121724429416738e-06, "loss": 0.584, "step": 7749, "task_loss": 0.9777902960777283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27337443828582764, "epoch": 6.55, "learning_rate": 3.206134524815844e-06, "loss": 0.3719, "step": 7750, "task_loss": 0.5225952863693237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24841195344924927, "epoch": 6.55, "learning_rate": 3.200096606690013e-06, "loss": 0.4198, "step": 7751, "task_loss": 0.3015982210636139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3621988892555237, "epoch": 6.55, "learning_rate": 3.1940586885641833e-06, "loss": 0.5122, "step": 7752, "task_loss": 1.0003283023834229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4399791359901428, "epoch": 6.55, "learning_rate": 3.1880207704383535e-06, "loss": 0.4226, "step": 7753, "task_loss": 0.5162055492401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5156853795051575, "epoch": 6.55, "learning_rate": 3.181982852312523e-06, "loss": 0.518, "step": 7754, "task_loss": 1.6041244268417358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7884372472763062, "epoch": 6.56, "learning_rate": 3.1759449341866925e-06, "loss": 0.4916, "step": 7755, "task_loss": 1.0826177597045898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4020611047744751, "epoch": 6.56, "learning_rate": 3.1699070160608622e-06, "loss": 0.356, "step": 7756, "task_loss": 0.4561913311481476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3271491229534149, "epoch": 6.56, "learning_rate": 3.163869097935032e-06, "loss": 0.4042, "step": 7757, "task_loss": 0.5117846727371216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47055765986442566, "epoch": 6.56, "learning_rate": 3.157831179809202e-06, "loss": 0.3912, "step": 7758, "task_loss": 0.9024670124053955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21036764979362488, "epoch": 6.56, "learning_rate": 3.1517932616833714e-06, "loss": 0.3513, "step": 7759, "task_loss": 0.45168206095695496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1887086033821106, "epoch": 6.56, "learning_rate": 3.1457553435575416e-06, "loss": 0.4591, "step": 7760, "task_loss": 0.3421610891819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39320677518844604, "epoch": 6.56, "learning_rate": 3.139717425431711e-06, "loss": 0.3975, "step": 7761, "task_loss": 0.8622456789016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3508971929550171, "epoch": 6.56, "learning_rate": 3.133679507305881e-06, "loss": 0.3903, "step": 7762, "task_loss": 0.5515611171722412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4156569540500641, "epoch": 6.56, "learning_rate": 3.127641589180051e-06, "loss": 0.4597, "step": 7763, "task_loss": 0.5957874655723572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.528780460357666, "epoch": 6.56, "learning_rate": 3.1216036710542205e-06, "loss": 0.4121, "step": 7764, "task_loss": 0.8322903513908386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27483171224594116, "epoch": 6.56, "learning_rate": 3.11556575292839e-06, "loss": 0.4193, "step": 7765, "task_loss": 0.7760427594184875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43639183044433594, "epoch": 6.56, "learning_rate": 3.1095278348025603e-06, "loss": 0.4458, "step": 7766, "task_loss": 0.25227904319763184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27535173296928406, "epoch": 6.57, "learning_rate": 3.10348991667673e-06, "loss": 0.3113, "step": 7767, "task_loss": 0.27668142318725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1979883462190628, "epoch": 6.57, "learning_rate": 3.0974519985508998e-06, "loss": 0.4337, "step": 7768, "task_loss": 0.5311219692230225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.11772812157869339, "epoch": 6.57, "learning_rate": 3.0914140804250695e-06, "loss": 0.2962, "step": 7769, "task_loss": 0.006232676561921835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42099103331565857, "epoch": 6.57, "learning_rate": 3.0853761622992392e-06, "loss": 0.4041, "step": 7770, "task_loss": 0.31587886810302734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4458006024360657, "epoch": 6.57, "learning_rate": 3.0793382441734094e-06, "loss": 0.408, "step": 7771, "task_loss": 0.9738448262214661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2247922420501709, "epoch": 6.57, "learning_rate": 3.073300326047579e-06, "loss": 0.336, "step": 7772, "task_loss": 0.34269025921821594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7674140334129333, "epoch": 6.57, "learning_rate": 3.067262407921749e-06, "loss": 0.6021, "step": 7773, "task_loss": 1.3403419256210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4358260929584503, "epoch": 6.57, "learning_rate": 3.0612244897959185e-06, "loss": 0.5695, "step": 7774, "task_loss": 0.6478825807571411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.515540599822998, "epoch": 6.57, "learning_rate": 3.0551865716700883e-06, "loss": 0.3508, "step": 7775, "task_loss": 0.5011901259422302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4169060289859772, "epoch": 6.57, "learning_rate": 3.0491486535442584e-06, "loss": 0.4068, "step": 7776, "task_loss": 1.208677053451538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42060232162475586, "epoch": 6.57, "learning_rate": 3.043110735418428e-06, "loss": 0.4037, "step": 7777, "task_loss": 0.623650848865509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41231703758239746, "epoch": 6.57, "learning_rate": 3.037072817292598e-06, "loss": 0.3776, "step": 7778, "task_loss": 0.9123532772064209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42431432008743286, "epoch": 6.58, "learning_rate": 3.0310348991667676e-06, "loss": 0.4698, "step": 7779, "task_loss": 0.7035282850265503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5214707851409912, "epoch": 6.58, "learning_rate": 3.0249969810409373e-06, "loss": 0.4172, "step": 7780, "task_loss": 0.715429961681366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3027152717113495, "epoch": 6.58, "learning_rate": 3.018959062915107e-06, "loss": 0.4268, "step": 7781, "task_loss": 0.8404766321182251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38902848958969116, "epoch": 6.58, "learning_rate": 3.0129211447892768e-06, "loss": 0.3811, "step": 7782, "task_loss": 0.07236456125974655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5380887389183044, "epoch": 6.58, "learning_rate": 3.0068832266634465e-06, "loss": 0.418, "step": 7783, "task_loss": 1.2045073509216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40223369002342224, "epoch": 6.58, "learning_rate": 3.0008453085376162e-06, "loss": 0.524, "step": 7784, "task_loss": 0.7834155559539795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4357958137989044, "epoch": 6.58, "learning_rate": 2.994807390411786e-06, "loss": 0.3435, "step": 7785, "task_loss": 0.3571382164955139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3943432569503784, "epoch": 6.58, "learning_rate": 2.9887694722859557e-06, "loss": 0.4797, "step": 7786, "task_loss": 1.0196900367736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6913484930992126, "epoch": 6.58, "learning_rate": 2.982731554160126e-06, "loss": 0.4286, "step": 7787, "task_loss": 1.1754988431930542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3823729455471039, "epoch": 6.58, "learning_rate": 2.9766936360342955e-06, "loss": 0.3534, "step": 7788, "task_loss": 0.5555999875068665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4940057694911957, "epoch": 6.58, "learning_rate": 2.9706557179084653e-06, "loss": 0.3469, "step": 7789, "task_loss": 0.5420287251472473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24496492743492126, "epoch": 6.58, "learning_rate": 2.964617799782635e-06, "loss": 0.3917, "step": 7790, "task_loss": 0.150486022233963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.496233195066452, "epoch": 6.59, "learning_rate": 2.9585798816568047e-06, "loss": 0.3905, "step": 7791, "task_loss": 0.7251565456390381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5589656233787537, "epoch": 6.59, "learning_rate": 2.952541963530975e-06, "loss": 0.4376, "step": 7792, "task_loss": 1.370635986328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.433832585811615, "epoch": 6.59, "learning_rate": 2.9465040454051446e-06, "loss": 0.4523, "step": 7793, "task_loss": 0.8142435550689697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4779830574989319, "epoch": 6.59, "learning_rate": 2.9404661272793143e-06, "loss": 0.5005, "step": 7794, "task_loss": 0.6257989406585693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34620389342308044, "epoch": 6.59, "learning_rate": 2.934428209153484e-06, "loss": 0.4298, "step": 7795, "task_loss": 0.3199808597564697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27027735114097595, "epoch": 6.59, "learning_rate": 2.9283902910276537e-06, "loss": 0.3958, "step": 7796, "task_loss": 0.6207057237625122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4141535758972168, "epoch": 6.59, "learning_rate": 2.922352372901824e-06, "loss": 0.3452, "step": 7797, "task_loss": 0.3256186246871948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33471545577049255, "epoch": 6.59, "learning_rate": 2.9163144547759936e-06, "loss": 0.379, "step": 7798, "task_loss": 0.5102890133857727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33844488859176636, "epoch": 6.59, "learning_rate": 2.9102765366501633e-06, "loss": 0.4639, "step": 7799, "task_loss": 0.4191419184207916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15159349143505096, "epoch": 6.59, "learning_rate": 2.904238618524333e-06, "loss": 0.4018, "step": 7800, "task_loss": 0.04137583449482918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.466411828994751, "epoch": 6.59, "learning_rate": 2.898200700398503e-06, "loss": 0.5089, "step": 7801, "task_loss": 0.43680641055107117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37998342514038086, "epoch": 6.59, "learning_rate": 2.8921627822726725e-06, "loss": 0.4395, "step": 7802, "task_loss": 0.6249713897705078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5105043649673462, "epoch": 6.6, "learning_rate": 2.8861248641468422e-06, "loss": 0.4452, "step": 7803, "task_loss": 0.8355153799057007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3620811104774475, "epoch": 6.6, "learning_rate": 2.880086946021012e-06, "loss": 0.4232, "step": 7804, "task_loss": 0.5232028365135193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4444090723991394, "epoch": 6.6, "learning_rate": 2.8740490278951817e-06, "loss": 0.4479, "step": 7805, "task_loss": 0.5205304026603699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5998578667640686, "epoch": 6.6, "learning_rate": 2.8680111097693514e-06, "loss": 0.5906, "step": 7806, "task_loss": 1.0072587728500366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6915017366409302, "epoch": 6.6, "learning_rate": 2.8619731916435216e-06, "loss": 0.4136, "step": 7807, "task_loss": 1.1984752416610718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4589049220085144, "epoch": 6.6, "learning_rate": 2.8559352735176913e-06, "loss": 0.501, "step": 7808, "task_loss": 0.5365243554115295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3888126313686371, "epoch": 6.6, "learning_rate": 2.849897355391861e-06, "loss": 0.3624, "step": 7809, "task_loss": 1.0376373529434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3642645478248596, "epoch": 6.6, "learning_rate": 2.8438594372660307e-06, "loss": 0.4473, "step": 7810, "task_loss": 0.17252805829048157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3289703130722046, "epoch": 6.6, "learning_rate": 2.8378215191402005e-06, "loss": 0.3849, "step": 7811, "task_loss": 0.9243664741516113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2548018991947174, "epoch": 6.6, "learning_rate": 2.83178360101437e-06, "loss": 0.404, "step": 7812, "task_loss": 0.058791011571884155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3100758194923401, "epoch": 6.6, "learning_rate": 2.8257456828885403e-06, "loss": 0.3332, "step": 7813, "task_loss": 0.20684626698493958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37839335203170776, "epoch": 6.6, "learning_rate": 2.81970776476271e-06, "loss": 0.3993, "step": 7814, "task_loss": 0.29184335470199585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4549984037876129, "epoch": 6.61, "learning_rate": 2.8136698466368798e-06, "loss": 0.4734, "step": 7815, "task_loss": 0.6964058876037598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20752930641174316, "epoch": 6.61, "learning_rate": 2.8076319285110495e-06, "loss": 0.2656, "step": 7816, "task_loss": 0.43779420852661133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38366836309432983, "epoch": 6.61, "learning_rate": 2.8015940103852192e-06, "loss": 0.4193, "step": 7817, "task_loss": 0.9183304309844971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5784552693367004, "epoch": 6.61, "learning_rate": 2.7955560922593894e-06, "loss": 0.4785, "step": 7818, "task_loss": 0.6070873737335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5086684226989746, "epoch": 6.61, "learning_rate": 2.789518174133559e-06, "loss": 0.402, "step": 7819, "task_loss": 0.9996541738510132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48842471837997437, "epoch": 6.61, "learning_rate": 2.783480256007729e-06, "loss": 0.407, "step": 7820, "task_loss": 1.2925785779953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4267001748085022, "epoch": 6.61, "learning_rate": 2.7774423378818985e-06, "loss": 0.3581, "step": 7821, "task_loss": 1.0676548480987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4967702329158783, "epoch": 6.61, "learning_rate": 2.7714044197560683e-06, "loss": 0.468, "step": 7822, "task_loss": 0.5883840322494507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4273930788040161, "epoch": 6.61, "learning_rate": 2.765366501630238e-06, "loss": 0.5187, "step": 7823, "task_loss": 0.7461974620819092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3583569824695587, "epoch": 6.61, "learning_rate": 2.7593285835044077e-06, "loss": 0.4721, "step": 7824, "task_loss": 0.7887017726898193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2187284231185913, "epoch": 6.61, "learning_rate": 2.7532906653785774e-06, "loss": 0.2664, "step": 7825, "task_loss": 0.10962007939815521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.310714453458786, "epoch": 6.61, "learning_rate": 2.747252747252747e-06, "loss": 0.3914, "step": 7826, "task_loss": 0.8559110164642334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4884030520915985, "epoch": 6.62, "learning_rate": 2.741214829126917e-06, "loss": 0.5416, "step": 7827, "task_loss": 1.5918447971343994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.431973934173584, "epoch": 6.62, "learning_rate": 2.735176911001087e-06, "loss": 0.5168, "step": 7828, "task_loss": 1.1161606311798096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2481343150138855, "epoch": 6.62, "learning_rate": 2.7291389928752568e-06, "loss": 0.3986, "step": 7829, "task_loss": 0.8261445760726929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5061767101287842, "epoch": 6.62, "learning_rate": 2.7231010747494265e-06, "loss": 0.6034, "step": 7830, "task_loss": 0.3796250522136688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4761250615119934, "epoch": 6.62, "learning_rate": 2.717063156623596e-06, "loss": 0.4086, "step": 7831, "task_loss": 0.6396211385726929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4305231273174286, "epoch": 6.62, "learning_rate": 2.711025238497766e-06, "loss": 0.3261, "step": 7832, "task_loss": 0.29886797070503235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.206290140748024, "epoch": 6.62, "learning_rate": 2.7049873203719357e-06, "loss": 0.3853, "step": 7833, "task_loss": 0.7150135636329651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4795604348182678, "epoch": 6.62, "learning_rate": 2.698949402246106e-06, "loss": 0.4541, "step": 7834, "task_loss": 0.9646514058113098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3181604743003845, "epoch": 6.62, "learning_rate": 2.6929114841202755e-06, "loss": 0.3892, "step": 7835, "task_loss": 0.6452935934066772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.296188086271286, "epoch": 6.62, "learning_rate": 2.6868735659944453e-06, "loss": 0.5251, "step": 7836, "task_loss": 1.1955130100250244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23387813568115234, "epoch": 6.62, "learning_rate": 2.680835647868615e-06, "loss": 0.3997, "step": 7837, "task_loss": 0.237611785531044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4236043691635132, "epoch": 6.63, "learning_rate": 2.6747977297427847e-06, "loss": 0.3912, "step": 7838, "task_loss": 1.132907509803772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35488858819007874, "epoch": 6.63, "learning_rate": 2.668759811616955e-06, "loss": 0.4736, "step": 7839, "task_loss": 1.3407732248306274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3824750781059265, "epoch": 6.63, "learning_rate": 2.6627218934911246e-06, "loss": 0.4276, "step": 7840, "task_loss": 1.432985782623291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31375280022621155, "epoch": 6.63, "learning_rate": 2.6566839753652943e-06, "loss": 0.4523, "step": 7841, "task_loss": 0.029650317505002022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37818723917007446, "epoch": 6.63, "learning_rate": 2.650646057239464e-06, "loss": 0.5582, "step": 7842, "task_loss": 0.3610091805458069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39065325260162354, "epoch": 6.63, "learning_rate": 2.6446081391136337e-06, "loss": 0.4625, "step": 7843, "task_loss": 0.6606209874153137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3053005039691925, "epoch": 6.63, "learning_rate": 2.638570220987804e-06, "loss": 0.2674, "step": 7844, "task_loss": 0.44320106506347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3128683865070343, "epoch": 6.63, "learning_rate": 2.632532302861973e-06, "loss": 0.4007, "step": 7845, "task_loss": 0.9063977003097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5638011693954468, "epoch": 6.63, "learning_rate": 2.626494384736143e-06, "loss": 0.5071, "step": 7846, "task_loss": 1.4544475078582764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40162816643714905, "epoch": 6.63, "learning_rate": 2.6204564666103126e-06, "loss": 0.4243, "step": 7847, "task_loss": 0.8912254571914673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22892434895038605, "epoch": 6.63, "learning_rate": 2.6144185484844824e-06, "loss": 0.277, "step": 7848, "task_loss": 0.45351681113243103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1996748000383377, "epoch": 6.63, "learning_rate": 2.6083806303586525e-06, "loss": 0.4053, "step": 7849, "task_loss": 0.5147760510444641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46449971199035645, "epoch": 6.64, "learning_rate": 2.6023427122328222e-06, "loss": 0.3243, "step": 7850, "task_loss": 1.0577633380889893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8068167567253113, "epoch": 6.64, "learning_rate": 2.596304794106992e-06, "loss": 0.4757, "step": 7851, "task_loss": 0.6515446305274963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7462450265884399, "epoch": 6.64, "learning_rate": 2.5902668759811617e-06, "loss": 0.4472, "step": 7852, "task_loss": 0.7469035983085632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41302064061164856, "epoch": 6.64, "learning_rate": 2.5842289578553314e-06, "loss": 0.4616, "step": 7853, "task_loss": 0.4949371814727783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20025980472564697, "epoch": 6.64, "learning_rate": 2.578191039729501e-06, "loss": 0.4748, "step": 7854, "task_loss": 0.035653483122587204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5265182256698608, "epoch": 6.64, "learning_rate": 2.5721531216036713e-06, "loss": 0.3891, "step": 7855, "task_loss": 0.9856874942779541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36393970251083374, "epoch": 6.64, "learning_rate": 2.566115203477841e-06, "loss": 0.5077, "step": 7856, "task_loss": 0.6945651173591614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2998431324958801, "epoch": 6.64, "learning_rate": 2.5600772853520107e-06, "loss": 0.3087, "step": 7857, "task_loss": 0.4087787866592407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3158137798309326, "epoch": 6.64, "learning_rate": 2.5540393672261805e-06, "loss": 0.3701, "step": 7858, "task_loss": 0.21478907763957977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3998490571975708, "epoch": 6.64, "learning_rate": 2.54800144910035e-06, "loss": 0.3526, "step": 7859, "task_loss": 0.6403995156288147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4730547368526459, "epoch": 6.64, "learning_rate": 2.5419635309745203e-06, "loss": 0.3591, "step": 7860, "task_loss": 0.8645583391189575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23492273688316345, "epoch": 6.64, "learning_rate": 2.53592561284869e-06, "loss": 0.3927, "step": 7861, "task_loss": 0.595885694026947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32106345891952515, "epoch": 6.65, "learning_rate": 2.5298876947228598e-06, "loss": 0.3829, "step": 7862, "task_loss": 0.16185112297534943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2626842260360718, "epoch": 6.65, "learning_rate": 2.5238497765970295e-06, "loss": 0.4081, "step": 7863, "task_loss": 0.8436007499694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31590473651885986, "epoch": 6.65, "learning_rate": 2.5178118584711992e-06, "loss": 0.4463, "step": 7864, "task_loss": 0.24911530315876007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6417696475982666, "epoch": 6.65, "learning_rate": 2.5117739403453694e-06, "loss": 0.5727, "step": 7865, "task_loss": 1.059611201286316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4639134407043457, "epoch": 6.65, "learning_rate": 2.505736022219539e-06, "loss": 0.4842, "step": 7866, "task_loss": 0.3415990471839905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3123406767845154, "epoch": 6.65, "learning_rate": 2.499698104093709e-06, "loss": 0.3374, "step": 7867, "task_loss": 0.40217944979667664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26490211486816406, "epoch": 6.65, "learning_rate": 2.4936601859678785e-06, "loss": 0.3807, "step": 7868, "task_loss": 0.26621001958847046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2414810061454773, "epoch": 6.65, "learning_rate": 2.4876222678420483e-06, "loss": 0.409, "step": 7869, "task_loss": 0.32589191198349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23923486471176147, "epoch": 6.65, "learning_rate": 2.481584349716218e-06, "loss": 0.3267, "step": 7870, "task_loss": 0.6938450932502747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8443307280540466, "epoch": 6.65, "learning_rate": 2.4755464315903877e-06, "loss": 0.4717, "step": 7871, "task_loss": 1.4285906553268433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1790551245212555, "epoch": 6.65, "learning_rate": 2.4695085134645574e-06, "loss": 0.4403, "step": 7872, "task_loss": 0.43923547863960266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5515785813331604, "epoch": 6.65, "learning_rate": 2.463470595338727e-06, "loss": 0.4053, "step": 7873, "task_loss": 0.7410345673561096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2507975399494171, "epoch": 6.66, "learning_rate": 2.457432677212897e-06, "loss": 0.3361, "step": 7874, "task_loss": 0.20648400485515594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42468440532684326, "epoch": 6.66, "learning_rate": 2.4513947590870666e-06, "loss": 0.5302, "step": 7875, "task_loss": 1.0519031286239624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4347517192363739, "epoch": 6.66, "learning_rate": 2.4453568409612368e-06, "loss": 0.4273, "step": 7876, "task_loss": 0.12205415219068527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26051557064056396, "epoch": 6.66, "learning_rate": 2.4393189228354065e-06, "loss": 0.3449, "step": 7877, "task_loss": 0.865318775177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.411740779876709, "epoch": 6.66, "learning_rate": 2.433281004709576e-06, "loss": 0.3478, "step": 7878, "task_loss": 0.42227035760879517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3249126970767975, "epoch": 6.66, "learning_rate": 2.427243086583746e-06, "loss": 0.4593, "step": 7879, "task_loss": 0.3463933765888214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20440863072872162, "epoch": 6.66, "learning_rate": 2.4212051684579157e-06, "loss": 0.3155, "step": 7880, "task_loss": 0.3279677629470825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3153698146343231, "epoch": 6.66, "learning_rate": 2.415167250332086e-06, "loss": 0.4577, "step": 7881, "task_loss": 1.3202075958251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3663857579231262, "epoch": 6.66, "learning_rate": 2.4091293322062555e-06, "loss": 0.4314, "step": 7882, "task_loss": 0.6951327323913574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2621501386165619, "epoch": 6.66, "learning_rate": 2.4030914140804253e-06, "loss": 0.353, "step": 7883, "task_loss": 0.3960483968257904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49510759115219116, "epoch": 6.66, "learning_rate": 2.397053495954595e-06, "loss": 0.3833, "step": 7884, "task_loss": 0.6981363892555237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3025045096874237, "epoch": 6.66, "learning_rate": 2.3910155778287647e-06, "loss": 0.4039, "step": 7885, "task_loss": 0.36626124382019043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5742518901824951, "epoch": 6.67, "learning_rate": 2.384977659702935e-06, "loss": 0.4343, "step": 7886, "task_loss": 0.556591808795929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5511546730995178, "epoch": 6.67, "learning_rate": 2.3789397415771046e-06, "loss": 0.4491, "step": 7887, "task_loss": 1.404227614402771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.233842134475708, "epoch": 6.67, "learning_rate": 2.3729018234512743e-06, "loss": 0.3665, "step": 7888, "task_loss": 0.615575909614563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5038156509399414, "epoch": 6.67, "learning_rate": 2.366863905325444e-06, "loss": 0.4828, "step": 7889, "task_loss": 1.5380604267120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41283005475997925, "epoch": 6.67, "learning_rate": 2.3608259871996137e-06, "loss": 0.3422, "step": 7890, "task_loss": 0.675048291683197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16099828481674194, "epoch": 6.67, "learning_rate": 2.3547880690737835e-06, "loss": 0.32, "step": 7891, "task_loss": 0.20258113741874695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5823311805725098, "epoch": 6.67, "learning_rate": 2.348750150947953e-06, "loss": 0.37, "step": 7892, "task_loss": 0.6660346388816833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.188491553068161, "epoch": 6.67, "learning_rate": 2.342712232822123e-06, "loss": 0.3943, "step": 7893, "task_loss": 0.4211389124393463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47819140553474426, "epoch": 6.67, "learning_rate": 2.3366743146962926e-06, "loss": 0.5128, "step": 7894, "task_loss": 1.080166220664978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2788325846195221, "epoch": 6.67, "learning_rate": 2.3306363965704624e-06, "loss": 0.3076, "step": 7895, "task_loss": 0.12697818875312805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16821561753749847, "epoch": 6.67, "learning_rate": 2.324598478444632e-06, "loss": 0.3315, "step": 7896, "task_loss": 0.5368017554283142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4878840446472168, "epoch": 6.67, "learning_rate": 2.3185605603188022e-06, "loss": 0.4428, "step": 7897, "task_loss": 1.1241984367370605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22706350684165955, "epoch": 6.68, "learning_rate": 2.312522642192972e-06, "loss": 0.4323, "step": 7898, "task_loss": 0.4268385171890259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25211775302886963, "epoch": 6.68, "learning_rate": 2.3064847240671417e-06, "loss": 0.3841, "step": 7899, "task_loss": 0.47959211468696594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33337458968162537, "epoch": 6.68, "learning_rate": 2.3004468059413114e-06, "loss": 0.5069, "step": 7900, "task_loss": 0.19119007885456085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30133315920829773, "epoch": 6.68, "learning_rate": 2.294408887815481e-06, "loss": 0.4271, "step": 7901, "task_loss": 0.8916338682174683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37784767150878906, "epoch": 6.68, "learning_rate": 2.2883709696896513e-06, "loss": 0.4181, "step": 7902, "task_loss": 0.9826608896255493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26956483721733093, "epoch": 6.68, "learning_rate": 2.282333051563821e-06, "loss": 0.3216, "step": 7903, "task_loss": 0.62283855676651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24518194794654846, "epoch": 6.68, "learning_rate": 2.2762951334379907e-06, "loss": 0.4202, "step": 7904, "task_loss": 1.2418737411499023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23428012430667877, "epoch": 6.68, "learning_rate": 2.2702572153121605e-06, "loss": 0.4165, "step": 7905, "task_loss": 0.29500505328178406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5334722995758057, "epoch": 6.68, "learning_rate": 2.26421929718633e-06, "loss": 0.4945, "step": 7906, "task_loss": 0.3435673117637634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.326000452041626, "epoch": 6.68, "learning_rate": 2.2581813790605003e-06, "loss": 0.4026, "step": 7907, "task_loss": 0.2820214033126831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46510452032089233, "epoch": 6.68, "learning_rate": 2.25214346093467e-06, "loss": 0.4793, "step": 7908, "task_loss": 0.37099313735961914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16066010296344757, "epoch": 6.69, "learning_rate": 2.2461055428088398e-06, "loss": 0.359, "step": 7909, "task_loss": 0.028501689434051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6241548657417297, "epoch": 6.69, "learning_rate": 2.2400676246830095e-06, "loss": 0.3806, "step": 7910, "task_loss": 1.0853996276855469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25910311937332153, "epoch": 6.69, "learning_rate": 2.2340297065571792e-06, "loss": 0.3262, "step": 7911, "task_loss": 0.05178667977452278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32086479663848877, "epoch": 6.69, "learning_rate": 2.227991788431349e-06, "loss": 0.3427, "step": 7912, "task_loss": 0.4692910611629486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2468743473291397, "epoch": 6.69, "learning_rate": 2.2219538703055187e-06, "loss": 0.3925, "step": 7913, "task_loss": 0.7566897869110107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5782186985015869, "epoch": 6.69, "learning_rate": 2.2159159521796884e-06, "loss": 0.447, "step": 7914, "task_loss": 0.6278772950172424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8625306487083435, "epoch": 6.69, "learning_rate": 2.209878034053858e-06, "loss": 0.524, "step": 7915, "task_loss": 1.9267921447753906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26600950956344604, "epoch": 6.69, "learning_rate": 2.203840115928028e-06, "loss": 0.3964, "step": 7916, "task_loss": 0.30941513180732727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40066784620285034, "epoch": 6.69, "learning_rate": 2.197802197802198e-06, "loss": 0.4589, "step": 7917, "task_loss": 0.3184880018234253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28865593671798706, "epoch": 6.69, "learning_rate": 2.1917642796763677e-06, "loss": 0.4239, "step": 7918, "task_loss": 0.045572374016046524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.557461678981781, "epoch": 6.69, "learning_rate": 2.1857263615505374e-06, "loss": 0.3945, "step": 7919, "task_loss": 0.2711033225059509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.289492666721344, "epoch": 6.69, "learning_rate": 2.179688443424707e-06, "loss": 0.3761, "step": 7920, "task_loss": 0.39926058053970337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5150561928749084, "epoch": 6.7, "learning_rate": 2.173650525298877e-06, "loss": 0.4371, "step": 7921, "task_loss": 0.9533637762069702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.515261173248291, "epoch": 6.7, "learning_rate": 2.1676126071730466e-06, "loss": 0.5923, "step": 7922, "task_loss": 0.43281084299087524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7919328808784485, "epoch": 6.7, "learning_rate": 2.1615746890472168e-06, "loss": 0.5064, "step": 7923, "task_loss": 0.4821922183036804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30817073583602905, "epoch": 6.7, "learning_rate": 2.1555367709213865e-06, "loss": 0.3927, "step": 7924, "task_loss": 0.4096126854419708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30237463116645813, "epoch": 6.7, "learning_rate": 2.149498852795556e-06, "loss": 0.442, "step": 7925, "task_loss": 0.5884860157966614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3515297770500183, "epoch": 6.7, "learning_rate": 2.143460934669726e-06, "loss": 0.3581, "step": 7926, "task_loss": 0.3662824034690857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4089769423007965, "epoch": 6.7, "learning_rate": 2.1374230165438957e-06, "loss": 0.3159, "step": 7927, "task_loss": 0.5543223023414612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5224348306655884, "epoch": 6.7, "learning_rate": 2.131385098418066e-06, "loss": 0.4309, "step": 7928, "task_loss": 0.9393535256385803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33549395203590393, "epoch": 6.7, "learning_rate": 2.1253471802922355e-06, "loss": 0.3378, "step": 7929, "task_loss": 0.2982596755027771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5055288076400757, "epoch": 6.7, "learning_rate": 2.1193092621664052e-06, "loss": 0.468, "step": 7930, "task_loss": 0.7529047727584839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24795804917812347, "epoch": 6.7, "learning_rate": 2.113271344040575e-06, "loss": 0.4143, "step": 7931, "task_loss": 0.13124839961528778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2998420298099518, "epoch": 6.7, "learning_rate": 2.1072334259147447e-06, "loss": 0.4433, "step": 7932, "task_loss": 0.2126983106136322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27831244468688965, "epoch": 6.71, "learning_rate": 2.101195507788915e-06, "loss": 0.361, "step": 7933, "task_loss": 0.8542847633361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3746930658817291, "epoch": 6.71, "learning_rate": 2.0951575896630846e-06, "loss": 0.385, "step": 7934, "task_loss": 0.4437871277332306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5596070885658264, "epoch": 6.71, "learning_rate": 2.089119671537254e-06, "loss": 0.4489, "step": 7935, "task_loss": 0.7102393507957458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3555496335029602, "epoch": 6.71, "learning_rate": 2.0830817534114236e-06, "loss": 0.4421, "step": 7936, "task_loss": 0.76910400390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3050389587879181, "epoch": 6.71, "learning_rate": 2.0770438352855933e-06, "loss": 0.3188, "step": 7937, "task_loss": 0.7956819534301758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24922674894332886, "epoch": 6.71, "learning_rate": 2.0710059171597635e-06, "loss": 0.3186, "step": 7938, "task_loss": 0.176559180021286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4000927805900574, "epoch": 6.71, "learning_rate": 2.064967999033933e-06, "loss": 0.3941, "step": 7939, "task_loss": 0.9128929972648621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5999336242675781, "epoch": 6.71, "learning_rate": 2.058930080908103e-06, "loss": 0.5608, "step": 7940, "task_loss": 1.114283800125122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5452135801315308, "epoch": 6.71, "learning_rate": 2.0528921627822726e-06, "loss": 0.4706, "step": 7941, "task_loss": 0.6370884776115417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3479295074939728, "epoch": 6.71, "learning_rate": 2.0468542446564424e-06, "loss": 0.2823, "step": 7942, "task_loss": 0.42150014638900757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5642030239105225, "epoch": 6.71, "learning_rate": 2.040816326530612e-06, "loss": 0.3913, "step": 7943, "task_loss": 0.7624131441116333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2225511074066162, "epoch": 6.71, "learning_rate": 2.0347784084047822e-06, "loss": 0.3364, "step": 7944, "task_loss": 0.4228188693523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2807016372680664, "epoch": 6.72, "learning_rate": 2.028740490278952e-06, "loss": 0.4071, "step": 7945, "task_loss": 0.8647692799568176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38694629073143005, "epoch": 6.72, "learning_rate": 2.0227025721531217e-06, "loss": 0.3648, "step": 7946, "task_loss": 1.0911650657653809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1855526566505432, "epoch": 6.72, "learning_rate": 2.0166646540272914e-06, "loss": 0.3738, "step": 7947, "task_loss": 0.3385108411312103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5334384441375732, "epoch": 6.72, "learning_rate": 2.010626735901461e-06, "loss": 0.4576, "step": 7948, "task_loss": 1.1585549116134644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22523944079875946, "epoch": 6.72, "learning_rate": 2.0045888177756313e-06, "loss": 0.2361, "step": 7949, "task_loss": 0.5896918773651123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48542869091033936, "epoch": 6.72, "learning_rate": 1.998550899649801e-06, "loss": 0.4754, "step": 7950, "task_loss": 0.5891206860542297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5120091438293457, "epoch": 6.72, "learning_rate": 1.9925129815239707e-06, "loss": 0.4674, "step": 7951, "task_loss": 0.7120363712310791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7795814275741577, "epoch": 6.72, "learning_rate": 1.9864750633981404e-06, "loss": 0.4468, "step": 7952, "task_loss": 0.713590145111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3642672598361969, "epoch": 6.72, "learning_rate": 1.98043714527231e-06, "loss": 0.467, "step": 7953, "task_loss": 0.0375012643635273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2871781289577484, "epoch": 6.72, "learning_rate": 1.9743992271464803e-06, "loss": 0.3489, "step": 7954, "task_loss": 0.506109356880188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37487345933914185, "epoch": 6.72, "learning_rate": 1.96836130902065e-06, "loss": 0.4275, "step": 7955, "task_loss": 0.8948546648025513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3117806017398834, "epoch": 6.72, "learning_rate": 1.9623233908948198e-06, "loss": 0.4144, "step": 7956, "task_loss": 0.572005569934845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3016677498817444, "epoch": 6.73, "learning_rate": 1.9562854727689895e-06, "loss": 0.4523, "step": 7957, "task_loss": 0.22136709094047546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43804067373275757, "epoch": 6.73, "learning_rate": 1.9502475546431592e-06, "loss": 0.3946, "step": 7958, "task_loss": 0.820746123790741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6419487595558167, "epoch": 6.73, "learning_rate": 1.944209636517329e-06, "loss": 0.3734, "step": 7959, "task_loss": 0.8261976838111877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4066256880760193, "epoch": 6.73, "learning_rate": 1.9381717183914987e-06, "loss": 0.3644, "step": 7960, "task_loss": 0.26412928104400635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40790605545043945, "epoch": 6.73, "learning_rate": 1.9321338002656684e-06, "loss": 0.3824, "step": 7961, "task_loss": 0.31495723128318787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3960433304309845, "epoch": 6.73, "learning_rate": 1.926095882139838e-06, "loss": 0.5451, "step": 7962, "task_loss": 0.10692081600427628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28375244140625, "epoch": 6.73, "learning_rate": 1.920057964014008e-06, "loss": 0.2544, "step": 7963, "task_loss": 0.316921591758728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5003267526626587, "epoch": 6.73, "learning_rate": 1.9140200458881776e-06, "loss": 0.3621, "step": 7964, "task_loss": 0.12720170617103577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.14777764678001404, "epoch": 6.73, "learning_rate": 1.9079821277623477e-06, "loss": 0.3467, "step": 7965, "task_loss": 0.2674954831600189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2301875352859497, "epoch": 6.73, "learning_rate": 1.9019442096365174e-06, "loss": 0.4033, "step": 7966, "task_loss": 0.3096529543399811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44599300622940063, "epoch": 6.73, "learning_rate": 1.8959062915106872e-06, "loss": 0.3165, "step": 7967, "task_loss": 0.6506139636039734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39861592650413513, "epoch": 6.73, "learning_rate": 1.8898683733848569e-06, "loss": 0.545, "step": 7968, "task_loss": 1.3597209453582764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43565884232521057, "epoch": 6.74, "learning_rate": 1.8838304552590266e-06, "loss": 0.3441, "step": 7969, "task_loss": 0.23760254681110382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5121496915817261, "epoch": 6.74, "learning_rate": 1.8777925371331968e-06, "loss": 0.5822, "step": 7970, "task_loss": 1.17240309715271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3862224519252777, "epoch": 6.74, "learning_rate": 1.8717546190073665e-06, "loss": 0.4281, "step": 7971, "task_loss": 0.5243268013000488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29804879426956177, "epoch": 6.74, "learning_rate": 1.8657167008815362e-06, "loss": 0.4381, "step": 7972, "task_loss": 0.389913946390152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40790337324142456, "epoch": 6.74, "learning_rate": 1.859678782755706e-06, "loss": 0.3972, "step": 7973, "task_loss": 0.9472525119781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4372307062149048, "epoch": 6.74, "learning_rate": 1.8536408646298756e-06, "loss": 0.5798, "step": 7974, "task_loss": 0.7561575770378113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42009615898132324, "epoch": 6.74, "learning_rate": 1.8476029465040456e-06, "loss": 0.5174, "step": 7975, "task_loss": 0.16192148625850677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30770349502563477, "epoch": 6.74, "learning_rate": 1.8415650283782153e-06, "loss": 0.3667, "step": 7976, "task_loss": 0.5296329855918884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29661431908607483, "epoch": 6.74, "learning_rate": 1.835527110252385e-06, "loss": 0.4037, "step": 7977, "task_loss": 0.2183021605014801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5738179683685303, "epoch": 6.74, "learning_rate": 1.8294891921265548e-06, "loss": 0.4403, "step": 7978, "task_loss": 0.5755680799484253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3108544945716858, "epoch": 6.74, "learning_rate": 1.8234512740007245e-06, "loss": 0.5109, "step": 7979, "task_loss": 1.4009648561477661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42050036787986755, "epoch": 6.75, "learning_rate": 1.8174133558748946e-06, "loss": 0.4314, "step": 7980, "task_loss": 0.6934286952018738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36310625076293945, "epoch": 6.75, "learning_rate": 1.8113754377490644e-06, "loss": 0.4216, "step": 7981, "task_loss": 0.49055513739585876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4886122941970825, "epoch": 6.75, "learning_rate": 1.805337519623234e-06, "loss": 0.4387, "step": 7982, "task_loss": 0.7535020112991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.35759294033050537, "epoch": 6.75, "learning_rate": 1.7992996014974038e-06, "loss": 0.5487, "step": 7983, "task_loss": 0.9826281666755676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33584505319595337, "epoch": 6.75, "learning_rate": 1.7932616833715735e-06, "loss": 0.4077, "step": 7984, "task_loss": 0.16873866319656372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2501624524593353, "epoch": 6.75, "learning_rate": 1.7872237652457432e-06, "loss": 0.4212, "step": 7985, "task_loss": 0.3186874985694885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2867826819419861, "epoch": 6.75, "learning_rate": 1.7811858471199132e-06, "loss": 0.3232, "step": 7986, "task_loss": 0.4423218071460724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18785551190376282, "epoch": 6.75, "learning_rate": 1.775147928994083e-06, "loss": 0.4489, "step": 7987, "task_loss": 0.4902009665966034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5768045783042908, "epoch": 6.75, "learning_rate": 1.7691100108682526e-06, "loss": 0.4623, "step": 7988, "task_loss": 0.5507065653800964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3763067126274109, "epoch": 6.75, "learning_rate": 1.7630720927424224e-06, "loss": 0.4211, "step": 7989, "task_loss": 0.5409693121910095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44266730546951294, "epoch": 6.75, "learning_rate": 1.757034174616592e-06, "loss": 0.4566, "step": 7990, "task_loss": 0.7320343852043152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4275152385234833, "epoch": 6.75, "learning_rate": 1.7509962564907622e-06, "loss": 0.4532, "step": 7991, "task_loss": 0.37711501121520996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4945535361766815, "epoch": 6.76, "learning_rate": 1.744958338364932e-06, "loss": 0.3783, "step": 7992, "task_loss": 0.5126373171806335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31805723905563354, "epoch": 6.76, "learning_rate": 1.7389204202391017e-06, "loss": 0.3649, "step": 7993, "task_loss": 0.20909534394741058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1315464973449707, "epoch": 6.76, "learning_rate": 1.7328825021132714e-06, "loss": 0.3118, "step": 7994, "task_loss": 0.01028872188180685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5303727388381958, "epoch": 6.76, "learning_rate": 1.7268445839874411e-06, "loss": 0.3717, "step": 7995, "task_loss": 1.2639329433441162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4363662004470825, "epoch": 6.76, "learning_rate": 1.720806665861611e-06, "loss": 0.3448, "step": 7996, "task_loss": 1.200205683708191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4677465558052063, "epoch": 6.76, "learning_rate": 1.7147687477357808e-06, "loss": 0.4801, "step": 7997, "task_loss": 1.2612807750701904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6155959367752075, "epoch": 6.76, "learning_rate": 1.7087308296099505e-06, "loss": 0.3818, "step": 7998, "task_loss": 0.9973390698432922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6081398725509644, "epoch": 6.76, "learning_rate": 1.7026929114841202e-06, "loss": 0.4829, "step": 7999, "task_loss": 0.4856477975845337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3179943561553955, "epoch": 6.76, "learning_rate": 1.69665499335829e-06, "loss": 0.452, "step": 8000, "task_loss": 0.6029680967330933 }, { "epoch": 6.76, "eval_accuracy": 0.9126732673267327, "eval_loss": 0.26727527379989624, "eval_runtime": 226.8414, "eval_samples_per_second": 111.311, "eval_steps_per_second": 0.873, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36752182245254517, "epoch": 6.76, "learning_rate": 1.69061707523246e-06, "loss": 0.5529, "step": 8001, "task_loss": 0.12076438218355179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37279212474823, "epoch": 6.76, "learning_rate": 1.6845791571066298e-06, "loss": 0.3141, "step": 8002, "task_loss": 0.38453209400177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7577190399169922, "epoch": 6.76, "learning_rate": 1.6785412389807996e-06, "loss": 0.5044, "step": 8003, "task_loss": 0.9730449318885803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2961850166320801, "epoch": 6.77, "learning_rate": 1.6725033208549693e-06, "loss": 0.3412, "step": 8004, "task_loss": 0.5795434713363647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5098868608474731, "epoch": 6.77, "learning_rate": 1.666465402729139e-06, "loss": 0.4377, "step": 8005, "task_loss": 1.1716830730438232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4118422865867615, "epoch": 6.77, "learning_rate": 1.6604274846033087e-06, "loss": 0.5184, "step": 8006, "task_loss": 1.0460752248764038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21092408895492554, "epoch": 6.77, "learning_rate": 1.6543895664774787e-06, "loss": 0.3966, "step": 8007, "task_loss": 0.5367329716682434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4523795247077942, "epoch": 6.77, "learning_rate": 1.6483516483516484e-06, "loss": 0.3369, "step": 8008, "task_loss": 0.43254226446151733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3948954939842224, "epoch": 6.77, "learning_rate": 1.6423137302258181e-06, "loss": 0.4104, "step": 8009, "task_loss": 0.3960741460323334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.30564191937446594, "epoch": 6.77, "learning_rate": 1.6362758120999878e-06, "loss": 0.4818, "step": 8010, "task_loss": 0.3475039601325989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28640469908714294, "epoch": 6.77, "learning_rate": 1.6302378939741576e-06, "loss": 0.4139, "step": 8011, "task_loss": 0.6030334830284119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45724889636039734, "epoch": 6.77, "learning_rate": 1.6241999758483277e-06, "loss": 0.4217, "step": 8012, "task_loss": 1.0668278932571411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3608822822570801, "epoch": 6.77, "learning_rate": 1.6181620577224974e-06, "loss": 0.6165, "step": 8013, "task_loss": 0.6803489923477173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4502766728401184, "epoch": 6.77, "learning_rate": 1.6121241395966672e-06, "loss": 0.3513, "step": 8014, "task_loss": 0.8112562894821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23649826645851135, "epoch": 6.77, "learning_rate": 1.6060862214708369e-06, "loss": 0.4597, "step": 8015, "task_loss": 0.3836895525455475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.9106251001358032, "epoch": 6.78, "learning_rate": 1.6000483033450066e-06, "loss": 0.4863, "step": 8016, "task_loss": 0.8019269704818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5103771090507507, "epoch": 6.78, "learning_rate": 1.5940103852191767e-06, "loss": 0.5116, "step": 8017, "task_loss": 0.42284879088401794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3947240114212036, "epoch": 6.78, "learning_rate": 1.5879724670933463e-06, "loss": 0.383, "step": 8018, "task_loss": 1.3024839162826538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6466600298881531, "epoch": 6.78, "learning_rate": 1.581934548967516e-06, "loss": 0.4831, "step": 8019, "task_loss": 0.44638004899024963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4026990830898285, "epoch": 6.78, "learning_rate": 1.5758966308416857e-06, "loss": 0.5114, "step": 8020, "task_loss": 0.7961933612823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5287365913391113, "epoch": 6.78, "learning_rate": 1.5698587127158554e-06, "loss": 0.5876, "step": 8021, "task_loss": 0.8645148277282715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4717355966567993, "epoch": 6.78, "learning_rate": 1.5638207945900256e-06, "loss": 0.5019, "step": 8022, "task_loss": 0.435039222240448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2992399334907532, "epoch": 6.78, "learning_rate": 1.557782876464195e-06, "loss": 0.3725, "step": 8023, "task_loss": 0.14062321186065674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2429455816745758, "epoch": 6.78, "learning_rate": 1.551744958338365e-06, "loss": 0.2931, "step": 8024, "task_loss": 0.5170424580574036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32316386699676514, "epoch": 6.78, "learning_rate": 1.5457070402125348e-06, "loss": 0.4101, "step": 8025, "task_loss": 0.7490684390068054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31373485922813416, "epoch": 6.78, "learning_rate": 1.5396691220867047e-06, "loss": 0.3198, "step": 8026, "task_loss": 0.4116826057434082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5458649396896362, "epoch": 6.78, "learning_rate": 1.5336312039608744e-06, "loss": 0.4077, "step": 8027, "task_loss": 0.09386246651411057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29614460468292236, "epoch": 6.79, "learning_rate": 1.5275932858350441e-06, "loss": 0.4112, "step": 8028, "task_loss": 0.5199512839317322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38036584854125977, "epoch": 6.79, "learning_rate": 1.521555367709214e-06, "loss": 0.3372, "step": 8029, "task_loss": 0.9518170356750488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24982276558876038, "epoch": 6.79, "learning_rate": 1.5155174495833838e-06, "loss": 0.3111, "step": 8030, "task_loss": 0.31936317682266235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38876450061798096, "epoch": 6.79, "learning_rate": 1.5094795314575535e-06, "loss": 0.4135, "step": 8031, "task_loss": 0.4519073963165283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22202223539352417, "epoch": 6.79, "learning_rate": 1.5034416133317232e-06, "loss": 0.3854, "step": 8032, "task_loss": 0.7422440648078918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27680790424346924, "epoch": 6.79, "learning_rate": 1.497403695205893e-06, "loss": 0.4041, "step": 8033, "task_loss": 0.40240928530693054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2869625389575958, "epoch": 6.79, "learning_rate": 1.491365777080063e-06, "loss": 0.3567, "step": 8034, "task_loss": 0.11391927301883698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22488252818584442, "epoch": 6.79, "learning_rate": 1.4853278589542326e-06, "loss": 0.3181, "step": 8035, "task_loss": 0.6639074087142944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5034712553024292, "epoch": 6.79, "learning_rate": 1.4792899408284024e-06, "loss": 0.5009, "step": 8036, "task_loss": 0.9051762819290161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4290744662284851, "epoch": 6.79, "learning_rate": 1.4732520227025723e-06, "loss": 0.4564, "step": 8037, "task_loss": 0.4157385230064392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7050086259841919, "epoch": 6.79, "learning_rate": 1.467214104576742e-06, "loss": 0.6109, "step": 8038, "task_loss": 0.4410955607891083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3011908531188965, "epoch": 6.79, "learning_rate": 1.461176186450912e-06, "loss": 0.4057, "step": 8039, "task_loss": 0.934240996837616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4592078924179077, "epoch": 6.8, "learning_rate": 1.4551382683250817e-06, "loss": 0.4102, "step": 8040, "task_loss": 0.4647703468799591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3786677122116089, "epoch": 6.8, "learning_rate": 1.4491003501992514e-06, "loss": 0.4414, "step": 8041, "task_loss": 0.3575001060962677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2954862117767334, "epoch": 6.8, "learning_rate": 1.4430624320734211e-06, "loss": 0.4506, "step": 8042, "task_loss": 0.46702656149864197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26899415254592896, "epoch": 6.8, "learning_rate": 1.4370245139475908e-06, "loss": 0.3475, "step": 8043, "task_loss": 0.09749466925859451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3965744376182556, "epoch": 6.8, "learning_rate": 1.4309865958217608e-06, "loss": 0.3893, "step": 8044, "task_loss": 0.43775674700737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31000733375549316, "epoch": 6.8, "learning_rate": 1.4249486776959305e-06, "loss": 0.342, "step": 8045, "task_loss": 0.5138819813728333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5599683523178101, "epoch": 6.8, "learning_rate": 1.4189107595701002e-06, "loss": 0.4317, "step": 8046, "task_loss": 0.7945274710655212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3637505769729614, "epoch": 6.8, "learning_rate": 1.4128728414442702e-06, "loss": 0.3347, "step": 8047, "task_loss": 0.16970522701740265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4931928515434265, "epoch": 6.8, "learning_rate": 1.4068349233184399e-06, "loss": 0.4541, "step": 8048, "task_loss": 0.2994762659072876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3583560585975647, "epoch": 6.8, "learning_rate": 1.4007970051926096e-06, "loss": 0.3454, "step": 8049, "task_loss": 0.16286805272102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2832902669906616, "epoch": 6.8, "learning_rate": 1.3947590870667795e-06, "loss": 0.3906, "step": 8050, "task_loss": 0.6985470056533813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3191336393356323, "epoch": 6.81, "learning_rate": 1.3887211689409493e-06, "loss": 0.308, "step": 8051, "task_loss": 0.2285272777080536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420382618904114, "epoch": 6.81, "learning_rate": 1.382683250815119e-06, "loss": 0.4478, "step": 8052, "task_loss": 0.9865618944168091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5874152183532715, "epoch": 6.81, "learning_rate": 1.3766453326892887e-06, "loss": 0.3559, "step": 8053, "task_loss": 0.20289303362369537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23840884864330292, "epoch": 6.81, "learning_rate": 1.3706074145634584e-06, "loss": 0.3416, "step": 8054, "task_loss": 0.36507004499435425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34686076641082764, "epoch": 6.81, "learning_rate": 1.3645694964376284e-06, "loss": 0.3334, "step": 8055, "task_loss": 0.8852087259292603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23420071601867676, "epoch": 6.81, "learning_rate": 1.358531578311798e-06, "loss": 0.3152, "step": 8056, "task_loss": 0.6890289187431335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5600628852844238, "epoch": 6.81, "learning_rate": 1.3524936601859678e-06, "loss": 0.5092, "step": 8057, "task_loss": 1.6099399328231812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4027302861213684, "epoch": 6.81, "learning_rate": 1.3464557420601378e-06, "loss": 0.3917, "step": 8058, "task_loss": 0.6434075236320496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3910123109817505, "epoch": 6.81, "learning_rate": 1.3404178239343075e-06, "loss": 0.5619, "step": 8059, "task_loss": 0.2556496560573578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6062817573547363, "epoch": 6.81, "learning_rate": 1.3343799058084774e-06, "loss": 0.5598, "step": 8060, "task_loss": 0.3176896870136261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31638848781585693, "epoch": 6.81, "learning_rate": 1.3283419876826471e-06, "loss": 0.3816, "step": 8061, "task_loss": 0.24318234622478485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4265042543411255, "epoch": 6.81, "learning_rate": 1.3223040695568169e-06, "loss": 0.4708, "step": 8062, "task_loss": 0.4662654995918274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37705564498901367, "epoch": 6.82, "learning_rate": 1.3162661514309866e-06, "loss": 0.4495, "step": 8063, "task_loss": 0.5173128843307495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3848232328891754, "epoch": 6.82, "learning_rate": 1.3102282333051563e-06, "loss": 0.325, "step": 8064, "task_loss": 0.4820360541343689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.516701340675354, "epoch": 6.82, "learning_rate": 1.3041903151793263e-06, "loss": 0.4259, "step": 8065, "task_loss": 0.41348904371261597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44096285104751587, "epoch": 6.82, "learning_rate": 1.298152397053496e-06, "loss": 0.3127, "step": 8066, "task_loss": 0.22364304959774017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3346620202064514, "epoch": 6.82, "learning_rate": 1.2921144789276657e-06, "loss": 0.464, "step": 8067, "task_loss": 0.9475773572921753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4296698570251465, "epoch": 6.82, "learning_rate": 1.2860765608018356e-06, "loss": 0.4578, "step": 8068, "task_loss": 1.1858670711517334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36888259649276733, "epoch": 6.82, "learning_rate": 1.2800386426760054e-06, "loss": 0.4407, "step": 8069, "task_loss": 0.6773166060447693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4725002348423004, "epoch": 6.82, "learning_rate": 1.274000724550175e-06, "loss": 0.5492, "step": 8070, "task_loss": 0.5123980045318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6548303365707397, "epoch": 6.82, "learning_rate": 1.267962806424345e-06, "loss": 0.5271, "step": 8071, "task_loss": 0.7471229434013367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4654304087162018, "epoch": 6.82, "learning_rate": 1.2619248882985147e-06, "loss": 0.3401, "step": 8072, "task_loss": 0.46453535556793213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43890371918678284, "epoch": 6.82, "learning_rate": 1.2558869701726847e-06, "loss": 0.3421, "step": 8073, "task_loss": 1.2718652486801147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4536915123462677, "epoch": 6.82, "learning_rate": 1.2498490520468544e-06, "loss": 0.428, "step": 8074, "task_loss": 1.1045451164245605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.15460707247257233, "epoch": 6.83, "learning_rate": 1.2438111339210241e-06, "loss": 0.3916, "step": 8075, "task_loss": 0.1299423724412918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3087708353996277, "epoch": 6.83, "learning_rate": 1.2377732157951939e-06, "loss": 0.3365, "step": 8076, "task_loss": 0.21000158786773682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3338640630245209, "epoch": 6.83, "learning_rate": 1.2317352976693636e-06, "loss": 0.3898, "step": 8077, "task_loss": 0.5585970282554626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.24771705269813538, "epoch": 6.83, "learning_rate": 1.2256973795435333e-06, "loss": 0.3262, "step": 8078, "task_loss": 0.24024909734725952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29337120056152344, "epoch": 6.83, "learning_rate": 1.2196594614177032e-06, "loss": 0.291, "step": 8079, "task_loss": 0.7006511092185974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4343748092651367, "epoch": 6.83, "learning_rate": 1.213621543291873e-06, "loss": 0.424, "step": 8080, "task_loss": 1.0463024377822876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32036271691322327, "epoch": 6.83, "learning_rate": 1.207583625166043e-06, "loss": 0.3982, "step": 8081, "task_loss": 0.6397793889045715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25133317708969116, "epoch": 6.83, "learning_rate": 1.2015457070402126e-06, "loss": 0.4583, "step": 8082, "task_loss": 0.07132356613874435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3481224477291107, "epoch": 6.83, "learning_rate": 1.1955077889143823e-06, "loss": 0.349, "step": 8083, "task_loss": 1.3154499530792236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3263162672519684, "epoch": 6.83, "learning_rate": 1.1894698707885523e-06, "loss": 0.4042, "step": 8084, "task_loss": 0.8532785177230835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4737127125263214, "epoch": 6.83, "learning_rate": 1.183431952662722e-06, "loss": 0.5398, "step": 8085, "task_loss": 0.5222520232200623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38231703639030457, "epoch": 6.83, "learning_rate": 1.1773940345368917e-06, "loss": 0.3944, "step": 8086, "task_loss": 0.1516355574131012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5626338124275208, "epoch": 6.84, "learning_rate": 1.1713561164110615e-06, "loss": 0.4744, "step": 8087, "task_loss": 0.3940061032772064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2515054941177368, "epoch": 6.84, "learning_rate": 1.1653181982852312e-06, "loss": 0.3411, "step": 8088, "task_loss": 0.19618873298168182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5854541063308716, "epoch": 6.84, "learning_rate": 1.1592802801594011e-06, "loss": 0.4218, "step": 8089, "task_loss": 0.4840226471424103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2881999909877777, "epoch": 6.84, "learning_rate": 1.1532423620335708e-06, "loss": 0.3697, "step": 8090, "task_loss": 0.35623180866241455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4555911421775818, "epoch": 6.84, "learning_rate": 1.1472044439077406e-06, "loss": 0.428, "step": 8091, "task_loss": 0.6215843558311462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3905564546585083, "epoch": 6.84, "learning_rate": 1.1411665257819105e-06, "loss": 0.4456, "step": 8092, "task_loss": 1.0606939792633057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42606687545776367, "epoch": 6.84, "learning_rate": 1.1351286076560802e-06, "loss": 0.4215, "step": 8093, "task_loss": 1.0449498891830444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.16538885235786438, "epoch": 6.84, "learning_rate": 1.1290906895302502e-06, "loss": 0.3765, "step": 8094, "task_loss": 0.1329883337020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5121326446533203, "epoch": 6.84, "learning_rate": 1.1230527714044199e-06, "loss": 0.3908, "step": 8095, "task_loss": 0.37612733244895935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40790823101997375, "epoch": 6.84, "learning_rate": 1.1170148532785896e-06, "loss": 0.3418, "step": 8096, "task_loss": 0.6417520642280579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.31515976786613464, "epoch": 6.84, "learning_rate": 1.1109769351527593e-06, "loss": 0.3775, "step": 8097, "task_loss": 0.4525148272514343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3336635231971741, "epoch": 6.84, "learning_rate": 1.104939017026929e-06, "loss": 0.4608, "step": 8098, "task_loss": 0.6153085231781006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2598334848880768, "epoch": 6.85, "learning_rate": 1.098901098901099e-06, "loss": 0.3469, "step": 8099, "task_loss": 0.24573233723640442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6087498068809509, "epoch": 6.85, "learning_rate": 1.0928631807752687e-06, "loss": 0.5024, "step": 8100, "task_loss": 0.431217223405838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.331132173538208, "epoch": 6.85, "learning_rate": 1.0868252626494384e-06, "loss": 0.4748, "step": 8101, "task_loss": 0.6218997836112976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34530937671661377, "epoch": 6.85, "learning_rate": 1.0807873445236084e-06, "loss": 0.4111, "step": 8102, "task_loss": 0.14589296281337738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5008348822593689, "epoch": 6.85, "learning_rate": 1.074749426397778e-06, "loss": 0.4975, "step": 8103, "task_loss": 0.5618547201156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.276627779006958, "epoch": 6.85, "learning_rate": 1.0687115082719478e-06, "loss": 0.3837, "step": 8104, "task_loss": 0.20287364721298218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3788642883300781, "epoch": 6.85, "learning_rate": 1.0626735901461178e-06, "loss": 0.3701, "step": 8105, "task_loss": 0.4977553188800812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4012174904346466, "epoch": 6.85, "learning_rate": 1.0566356720202875e-06, "loss": 0.3504, "step": 8106, "task_loss": 0.8576327562332153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3681253492832184, "epoch": 6.85, "learning_rate": 1.0505977538944574e-06, "loss": 0.3502, "step": 8107, "task_loss": 2.7194690704345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.27520355582237244, "epoch": 6.85, "learning_rate": 1.044559835768627e-06, "loss": 0.4021, "step": 8108, "task_loss": 0.5127708315849304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43381834030151367, "epoch": 6.85, "learning_rate": 1.0385219176427967e-06, "loss": 0.4592, "step": 8109, "task_loss": 0.41394490003585815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34065431356430054, "epoch": 6.85, "learning_rate": 1.0324839995169666e-06, "loss": 0.4351, "step": 8110, "task_loss": 0.9003871083259583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41222646832466125, "epoch": 6.86, "learning_rate": 1.0264460813911363e-06, "loss": 0.3657, "step": 8111, "task_loss": 0.9168933033943176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.18176651000976562, "epoch": 6.86, "learning_rate": 1.020408163265306e-06, "loss": 0.2647, "step": 8112, "task_loss": 0.2734471559524536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28752341866493225, "epoch": 6.86, "learning_rate": 1.014370245139476e-06, "loss": 0.3991, "step": 8113, "task_loss": 0.5798923373222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.287160187959671, "epoch": 6.86, "learning_rate": 1.0083323270136457e-06, "loss": 0.4789, "step": 8114, "task_loss": 0.6882603764533997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5609266757965088, "epoch": 6.86, "learning_rate": 1.0022944088878156e-06, "loss": 0.4088, "step": 8115, "task_loss": 0.9643489122390747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39026910066604614, "epoch": 6.86, "learning_rate": 9.962564907619854e-07, "loss": 0.4199, "step": 8116, "task_loss": 0.1604255735874176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3149331212043762, "epoch": 6.86, "learning_rate": 9.90218572636155e-07, "loss": 0.3534, "step": 8117, "task_loss": 0.6909155249595642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.411400705575943, "epoch": 6.86, "learning_rate": 9.84180654510325e-07, "loss": 0.2929, "step": 8118, "task_loss": 0.8703902959823608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43736398220062256, "epoch": 6.86, "learning_rate": 9.781427363844947e-07, "loss": 0.4473, "step": 8119, "task_loss": 1.1147948503494263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32897982001304626, "epoch": 6.86, "learning_rate": 9.721048182586645e-07, "loss": 0.5436, "step": 8120, "task_loss": 0.6354244947433472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6896844506263733, "epoch": 6.86, "learning_rate": 9.660669001328342e-07, "loss": 0.4425, "step": 8121, "task_loss": 0.4681882858276367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43088117241859436, "epoch": 6.87, "learning_rate": 9.60028982007004e-07, "loss": 0.4394, "step": 8122, "task_loss": 0.5679709315299988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37356817722320557, "epoch": 6.87, "learning_rate": 9.539910638811739e-07, "loss": 0.3776, "step": 8123, "task_loss": 0.4733153283596039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.48914074897766113, "epoch": 6.87, "learning_rate": 9.479531457553436e-07, "loss": 0.439, "step": 8124, "task_loss": 0.8664729595184326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4317038655281067, "epoch": 6.87, "learning_rate": 9.419152276295133e-07, "loss": 0.394, "step": 8125, "task_loss": 0.9643426537513733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33526235818862915, "epoch": 6.87, "learning_rate": 9.358773095036832e-07, "loss": 0.409, "step": 8126, "task_loss": 0.9616798758506775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5133476257324219, "epoch": 6.87, "learning_rate": 9.29839391377853e-07, "loss": 0.4956, "step": 8127, "task_loss": 0.8181787133216858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25340792536735535, "epoch": 6.87, "learning_rate": 9.238014732520228e-07, "loss": 0.3446, "step": 8128, "task_loss": 0.19762665033340454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4637390971183777, "epoch": 6.87, "learning_rate": 9.177635551261925e-07, "loss": 0.4314, "step": 8129, "task_loss": 0.6076276302337646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.357305645942688, "epoch": 6.87, "learning_rate": 9.117256370003622e-07, "loss": 0.3837, "step": 8130, "task_loss": 0.07775256037712097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2515905499458313, "epoch": 6.87, "learning_rate": 9.056877188745322e-07, "loss": 0.3471, "step": 8131, "task_loss": 0.33892595767974854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.34571531414985657, "epoch": 6.87, "learning_rate": 8.996498007487019e-07, "loss": 0.4797, "step": 8132, "task_loss": 0.7883678674697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5074376463890076, "epoch": 6.87, "learning_rate": 8.936118826228716e-07, "loss": 0.4433, "step": 8133, "task_loss": 0.3056316077709198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3160848319530487, "epoch": 6.88, "learning_rate": 8.875739644970415e-07, "loss": 0.4255, "step": 8134, "task_loss": 0.7935640215873718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45918622612953186, "epoch": 6.88, "learning_rate": 8.815360463712112e-07, "loss": 0.4804, "step": 8135, "task_loss": 0.5965117812156677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4346823990345001, "epoch": 6.88, "learning_rate": 8.754981282453811e-07, "loss": 0.3842, "step": 8136, "task_loss": 0.3502546548843384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6441003680229187, "epoch": 6.88, "learning_rate": 8.694602101195508e-07, "loss": 0.4987, "step": 8137, "task_loss": 0.683269739151001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5230835676193237, "epoch": 6.88, "learning_rate": 8.634222919937206e-07, "loss": 0.394, "step": 8138, "task_loss": 0.8928271532058716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2345597892999649, "epoch": 6.88, "learning_rate": 8.573843738678904e-07, "loss": 0.5523, "step": 8139, "task_loss": 0.22632645070552826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5438345074653625, "epoch": 6.88, "learning_rate": 8.513464557420601e-07, "loss": 0.3649, "step": 8140, "task_loss": 0.5837914347648621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23033744096755981, "epoch": 6.88, "learning_rate": 8.4530853761623e-07, "loss": 0.2821, "step": 8141, "task_loss": 0.10059036314487457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.285895437002182, "epoch": 6.88, "learning_rate": 8.392706194903998e-07, "loss": 0.4645, "step": 8142, "task_loss": 0.40806180238723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4832836985588074, "epoch": 6.88, "learning_rate": 8.332327013645695e-07, "loss": 0.5162, "step": 8143, "task_loss": 0.6327180862426758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2849859297275543, "epoch": 6.88, "learning_rate": 8.271947832387393e-07, "loss": 0.3749, "step": 8144, "task_loss": 0.4231637418270111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37609267234802246, "epoch": 6.88, "learning_rate": 8.211568651129091e-07, "loss": 0.3352, "step": 8145, "task_loss": 0.5230594277381897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3001956343650818, "epoch": 6.89, "learning_rate": 8.151189469870788e-07, "loss": 0.393, "step": 8146, "task_loss": 0.5744699239730835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46324974298477173, "epoch": 6.89, "learning_rate": 8.090810288612487e-07, "loss": 0.3765, "step": 8147, "task_loss": 0.18940910696983337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25378215312957764, "epoch": 6.89, "learning_rate": 8.030431107354184e-07, "loss": 0.4326, "step": 8148, "task_loss": 0.46648675203323364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42185258865356445, "epoch": 6.89, "learning_rate": 7.970051926095884e-07, "loss": 0.5215, "step": 8149, "task_loss": 0.487506240606308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45335930585861206, "epoch": 6.89, "learning_rate": 7.90967274483758e-07, "loss": 0.4531, "step": 8150, "task_loss": 0.5737056732177734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.505012035369873, "epoch": 6.89, "learning_rate": 7.849293563579277e-07, "loss": 0.4273, "step": 8151, "task_loss": 1.5874508619308472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2771593928337097, "epoch": 6.89, "learning_rate": 7.788914382320975e-07, "loss": 0.4523, "step": 8152, "task_loss": 0.39473310112953186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40684062242507935, "epoch": 6.89, "learning_rate": 7.728535201062674e-07, "loss": 0.5218, "step": 8153, "task_loss": 0.23339396715164185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3783303201198578, "epoch": 6.89, "learning_rate": 7.668156019804372e-07, "loss": 0.3988, "step": 8154, "task_loss": 0.7179688215255737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3663947880268097, "epoch": 6.89, "learning_rate": 7.60777683854607e-07, "loss": 0.3217, "step": 8155, "task_loss": 0.32890433073043823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4665611982345581, "epoch": 6.89, "learning_rate": 7.547397657287768e-07, "loss": 0.4, "step": 8156, "task_loss": 1.0287481546401978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4335521459579468, "epoch": 6.89, "learning_rate": 7.487018476029465e-07, "loss": 0.4461, "step": 8157, "task_loss": 1.2561373710632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.49254167079925537, "epoch": 6.9, "learning_rate": 7.426639294771163e-07, "loss": 0.4501, "step": 8158, "task_loss": 0.45394444465637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3290424048900604, "epoch": 6.9, "learning_rate": 7.366260113512861e-07, "loss": 0.4876, "step": 8159, "task_loss": 0.36518895626068115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21469900012016296, "epoch": 6.9, "learning_rate": 7.30588093225456e-07, "loss": 0.4137, "step": 8160, "task_loss": 0.3176056742668152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.25622040033340454, "epoch": 6.9, "learning_rate": 7.245501750996257e-07, "loss": 0.3501, "step": 8161, "task_loss": 0.22923578321933746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3183137774467468, "epoch": 6.9, "learning_rate": 7.185122569737954e-07, "loss": 0.3315, "step": 8162, "task_loss": 0.4171198606491089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.213542640209198, "epoch": 6.9, "learning_rate": 7.124743388479653e-07, "loss": 0.4382, "step": 8163, "task_loss": 1.1673282384872437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37414249777793884, "epoch": 6.9, "learning_rate": 7.064364207221351e-07, "loss": 0.4184, "step": 8164, "task_loss": 0.7446993589401245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2703491449356079, "epoch": 6.9, "learning_rate": 7.003985025963048e-07, "loss": 0.2869, "step": 8165, "task_loss": 0.09363465011119843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3255982995033264, "epoch": 6.9, "learning_rate": 6.943605844704746e-07, "loss": 0.4884, "step": 8166, "task_loss": 1.2932459115982056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39657095074653625, "epoch": 6.9, "learning_rate": 6.883226663446444e-07, "loss": 0.3838, "step": 8167, "task_loss": 0.41045093536376953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7278048396110535, "epoch": 6.9, "learning_rate": 6.822847482188142e-07, "loss": 0.4919, "step": 8168, "task_loss": 1.0628044605255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.612358808517456, "epoch": 6.9, "learning_rate": 6.762468300929839e-07, "loss": 0.5065, "step": 8169, "task_loss": 0.7821404337882996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3966567814350128, "epoch": 6.91, "learning_rate": 6.702089119671537e-07, "loss": 0.4038, "step": 8170, "task_loss": 0.31110134720802307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36263754963874817, "epoch": 6.91, "learning_rate": 6.641709938413236e-07, "loss": 0.4968, "step": 8171, "task_loss": 0.5597882270812988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29303961992263794, "epoch": 6.91, "learning_rate": 6.581330757154933e-07, "loss": 0.3814, "step": 8172, "task_loss": 0.6536696553230286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47450968623161316, "epoch": 6.91, "learning_rate": 6.520951575896631e-07, "loss": 0.3956, "step": 8173, "task_loss": 1.1226543188095093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5717104077339172, "epoch": 6.91, "learning_rate": 6.460572394638329e-07, "loss": 0.5377, "step": 8174, "task_loss": 1.3531861305236816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.67014479637146, "epoch": 6.91, "learning_rate": 6.400193213380027e-07, "loss": 0.44, "step": 8175, "task_loss": 1.3451555967330933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6766231656074524, "epoch": 6.91, "learning_rate": 6.339814032121725e-07, "loss": 0.4322, "step": 8176, "task_loss": 0.4496873915195465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4914029538631439, "epoch": 6.91, "learning_rate": 6.279434850863423e-07, "loss": 0.4695, "step": 8177, "task_loss": 1.003716230392456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.12936672568321228, "epoch": 6.91, "learning_rate": 6.219055669605121e-07, "loss": 0.482, "step": 8178, "task_loss": 0.40614667534828186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1550191342830658, "epoch": 6.91, "learning_rate": 6.158676488346818e-07, "loss": 0.474, "step": 8179, "task_loss": 0.10459624230861664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33134132623672485, "epoch": 6.91, "learning_rate": 6.098297307088516e-07, "loss": 0.3437, "step": 8180, "task_loss": 0.6766206622123718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.510519802570343, "epoch": 6.91, "learning_rate": 6.037918125830215e-07, "loss": 0.4822, "step": 8181, "task_loss": 0.8180686831474304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5164583325386047, "epoch": 6.92, "learning_rate": 5.977538944571912e-07, "loss": 0.5062, "step": 8182, "task_loss": 1.2069807052612305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.8733446598052979, "epoch": 6.92, "learning_rate": 5.91715976331361e-07, "loss": 0.5766, "step": 8183, "task_loss": 1.321340799331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5231645107269287, "epoch": 6.92, "learning_rate": 5.856780582055307e-07, "loss": 0.3844, "step": 8184, "task_loss": 1.2551548480987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37025097012519836, "epoch": 6.92, "learning_rate": 5.796401400797006e-07, "loss": 0.3229, "step": 8185, "task_loss": 0.1682988703250885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.46883317828178406, "epoch": 6.92, "learning_rate": 5.736022219538703e-07, "loss": 0.5129, "step": 8186, "task_loss": 1.0776530504226685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29069143533706665, "epoch": 6.92, "learning_rate": 5.675643038280401e-07, "loss": 0.4101, "step": 8187, "task_loss": 0.7024058699607849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4763966202735901, "epoch": 6.92, "learning_rate": 5.615263857022099e-07, "loss": 0.3963, "step": 8188, "task_loss": 0.6110609769821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44962620735168457, "epoch": 6.92, "learning_rate": 5.554884675763797e-07, "loss": 0.4656, "step": 8189, "task_loss": 0.6429226994514465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.42734795808792114, "epoch": 6.92, "learning_rate": 5.494505494505495e-07, "loss": 0.3554, "step": 8190, "task_loss": 0.6999492049217224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3357987701892853, "epoch": 6.92, "learning_rate": 5.434126313247192e-07, "loss": 0.2665, "step": 8191, "task_loss": 0.29889100790023804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5168956518173218, "epoch": 6.92, "learning_rate": 5.37374713198889e-07, "loss": 0.5186, "step": 8192, "task_loss": 0.4611911177635193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21987923979759216, "epoch": 6.93, "learning_rate": 5.313367950730589e-07, "loss": 0.3691, "step": 8193, "task_loss": 0.5268322229385376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37954726815223694, "epoch": 6.93, "learning_rate": 5.252988769472287e-07, "loss": 0.4221, "step": 8194, "task_loss": 0.1988370418548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6392261981964111, "epoch": 6.93, "learning_rate": 5.192609588213983e-07, "loss": 0.399, "step": 8195, "task_loss": 0.3287290930747986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41659075021743774, "epoch": 6.93, "learning_rate": 5.132230406955682e-07, "loss": 0.4101, "step": 8196, "task_loss": 0.6756597757339478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5132099986076355, "epoch": 6.93, "learning_rate": 5.07185122569738e-07, "loss": 0.3624, "step": 8197, "task_loss": 0.7284449338912964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.367946982383728, "epoch": 6.93, "learning_rate": 5.011472044439078e-07, "loss": 0.3967, "step": 8198, "task_loss": 0.4288441836833954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26173460483551025, "epoch": 6.93, "learning_rate": 4.951092863180775e-07, "loss": 0.3173, "step": 8199, "task_loss": 1.3075904846191406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28854647278785706, "epoch": 6.93, "learning_rate": 4.890713681922474e-07, "loss": 0.3417, "step": 8200, "task_loss": 0.1483665257692337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.376680850982666, "epoch": 6.93, "learning_rate": 4.830334500664171e-07, "loss": 0.4937, "step": 8201, "task_loss": 0.997186541557312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.23964419960975647, "epoch": 6.93, "learning_rate": 4.769955319405869e-07, "loss": 0.3447, "step": 8202, "task_loss": 1.1426169872283936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43199658393859863, "epoch": 6.93, "learning_rate": 4.7095761381475665e-07, "loss": 0.3444, "step": 8203, "task_loss": 0.79441899061203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17806360125541687, "epoch": 6.93, "learning_rate": 4.649196956889265e-07, "loss": 0.2796, "step": 8204, "task_loss": 0.14915844798088074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2783088684082031, "epoch": 6.94, "learning_rate": 4.5888177756309626e-07, "loss": 0.3704, "step": 8205, "task_loss": 0.7095043063163757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.19664883613586426, "epoch": 6.94, "learning_rate": 4.528438594372661e-07, "loss": 0.3289, "step": 8206, "task_loss": 0.9144218564033508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44928330183029175, "epoch": 6.94, "learning_rate": 4.468059413114358e-07, "loss": 0.3919, "step": 8207, "task_loss": 0.4711969494819641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20094677805900574, "epoch": 6.94, "learning_rate": 4.407680231856056e-07, "loss": 0.3666, "step": 8208, "task_loss": 0.016046574339270592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43739840388298035, "epoch": 6.94, "learning_rate": 4.347301050597754e-07, "loss": 0.4805, "step": 8209, "task_loss": 0.9702059030532837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.40838590264320374, "epoch": 6.94, "learning_rate": 4.286921869339452e-07, "loss": 0.4104, "step": 8210, "task_loss": 0.33884644508361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6758958101272583, "epoch": 6.94, "learning_rate": 4.22654268808115e-07, "loss": 0.4581, "step": 8211, "task_loss": 1.032557487487793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21364089846611023, "epoch": 6.94, "learning_rate": 4.1661635068228475e-07, "loss": 0.3657, "step": 8212, "task_loss": 0.40058815479278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.298591673374176, "epoch": 6.94, "learning_rate": 4.1057843255645453e-07, "loss": 0.4606, "step": 8213, "task_loss": 0.2349366694688797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3217827081680298, "epoch": 6.94, "learning_rate": 4.0454051443062436e-07, "loss": 0.3279, "step": 8214, "task_loss": 0.20146390795707703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.424287885427475, "epoch": 6.94, "learning_rate": 3.985025963047942e-07, "loss": 0.4528, "step": 8215, "task_loss": 1.092712163925171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41202759742736816, "epoch": 6.94, "learning_rate": 3.9246467817896386e-07, "loss": 0.336, "step": 8216, "task_loss": 0.33420389890670776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4061731994152069, "epoch": 6.95, "learning_rate": 3.864267600531337e-07, "loss": 0.4741, "step": 8217, "task_loss": 1.9225612878799438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33596500754356384, "epoch": 6.95, "learning_rate": 3.803888419273035e-07, "loss": 0.4856, "step": 8218, "task_loss": 1.0238655805587769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3063194751739502, "epoch": 6.95, "learning_rate": 3.7435092380147324e-07, "loss": 0.3719, "step": 8219, "task_loss": 0.2710898518562317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.43140900135040283, "epoch": 6.95, "learning_rate": 3.6831300567564307e-07, "loss": 0.5023, "step": 8220, "task_loss": 0.6899304986000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44983047246932983, "epoch": 6.95, "learning_rate": 3.6227508754981285e-07, "loss": 0.4424, "step": 8221, "task_loss": 1.2916851043701172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4073679447174072, "epoch": 6.95, "learning_rate": 3.562371694239826e-07, "loss": 0.3701, "step": 8222, "task_loss": 1.1006274223327637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.47595617175102234, "epoch": 6.95, "learning_rate": 3.501992512981524e-07, "loss": 0.4143, "step": 8223, "task_loss": 0.5812637209892273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28813672065734863, "epoch": 6.95, "learning_rate": 3.441613331723222e-07, "loss": 0.3647, "step": 8224, "task_loss": 0.14159630239009857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.326826810836792, "epoch": 6.95, "learning_rate": 3.3812341504649196e-07, "loss": 0.408, "step": 8225, "task_loss": 0.08592489361763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.21296627819538116, "epoch": 6.95, "learning_rate": 3.320854969206618e-07, "loss": 0.3651, "step": 8226, "task_loss": 0.3518364429473877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.44234588742256165, "epoch": 6.95, "learning_rate": 3.2604757879483156e-07, "loss": 0.3826, "step": 8227, "task_loss": 0.8696706891059875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.13616123795509338, "epoch": 6.95, "learning_rate": 3.2000966066900134e-07, "loss": 0.2091, "step": 8228, "task_loss": 0.2655141353607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26113781332969666, "epoch": 6.96, "learning_rate": 3.1397174254317117e-07, "loss": 0.4005, "step": 8229, "task_loss": 0.12295400351285934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.38352537155151367, "epoch": 6.96, "learning_rate": 3.079338244173409e-07, "loss": 0.4943, "step": 8230, "task_loss": 0.5141823291778564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3979910910129547, "epoch": 6.96, "learning_rate": 3.018959062915107e-07, "loss": 0.379, "step": 8231, "task_loss": 1.265740156173706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33287617564201355, "epoch": 6.96, "learning_rate": 2.958579881656805e-07, "loss": 0.3613, "step": 8232, "task_loss": 0.671626627445221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3962387442588806, "epoch": 6.96, "learning_rate": 2.898200700398503e-07, "loss": 0.3402, "step": 8233, "task_loss": 0.7090246677398682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5038058757781982, "epoch": 6.96, "learning_rate": 2.8378215191402006e-07, "loss": 0.428, "step": 8234, "task_loss": 0.9491145014762878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5114168524742126, "epoch": 6.96, "learning_rate": 2.7774423378818983e-07, "loss": 0.4507, "step": 8235, "task_loss": 0.7731209993362427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.13943323493003845, "epoch": 6.96, "learning_rate": 2.717063156623596e-07, "loss": 0.33, "step": 8236, "task_loss": 0.5297982096672058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4558562636375427, "epoch": 6.96, "learning_rate": 2.6566839753652944e-07, "loss": 0.3843, "step": 8237, "task_loss": 0.8994179368019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5355358123779297, "epoch": 6.96, "learning_rate": 2.5963047941069916e-07, "loss": 0.4074, "step": 8238, "task_loss": 0.723736584186554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4044015407562256, "epoch": 6.96, "learning_rate": 2.53592561284869e-07, "loss": 0.3372, "step": 8239, "task_loss": 0.3826352655887604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3198971152305603, "epoch": 6.96, "learning_rate": 2.4755464315903877e-07, "loss": 0.4066, "step": 8240, "task_loss": 0.4775959849357605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6953800320625305, "epoch": 6.97, "learning_rate": 2.4151672503320855e-07, "loss": 0.4665, "step": 8241, "task_loss": 0.16682122647762299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17449912428855896, "epoch": 6.97, "learning_rate": 2.3547880690737833e-07, "loss": 0.2643, "step": 8242, "task_loss": 0.1986398547887802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.37942612171173096, "epoch": 6.97, "learning_rate": 2.2944088878154813e-07, "loss": 0.3919, "step": 8243, "task_loss": 0.474393755197525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.32233500480651855, "epoch": 6.97, "learning_rate": 2.234029706557179e-07, "loss": 0.4255, "step": 8244, "task_loss": 0.49101269245147705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4243074357509613, "epoch": 6.97, "learning_rate": 2.173650525298877e-07, "loss": 0.4017, "step": 8245, "task_loss": 0.6890111565589905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.29664698243141174, "epoch": 6.97, "learning_rate": 2.113271344040575e-07, "loss": 0.4374, "step": 8246, "task_loss": 0.48036837577819824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.26391685009002686, "epoch": 6.97, "learning_rate": 2.0528921627822726e-07, "loss": 0.3012, "step": 8247, "task_loss": 0.5241109728813171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3508797585964203, "epoch": 6.97, "learning_rate": 1.992512981523971e-07, "loss": 0.4363, "step": 8248, "task_loss": 0.736172616481781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3754884600639343, "epoch": 6.97, "learning_rate": 1.9321338002656684e-07, "loss": 0.3232, "step": 8249, "task_loss": 0.7228637933731079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7203053832054138, "epoch": 6.97, "learning_rate": 1.8717546190073662e-07, "loss": 0.522, "step": 8250, "task_loss": 0.9883168935775757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3858627676963806, "epoch": 6.97, "learning_rate": 1.8113754377490642e-07, "loss": 0.3983, "step": 8251, "task_loss": 0.3175954818725586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.41841402649879456, "epoch": 6.97, "learning_rate": 1.750996256490762e-07, "loss": 0.5062, "step": 8252, "task_loss": 1.2561511993408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33357691764831543, "epoch": 6.98, "learning_rate": 1.6906170752324598e-07, "loss": 0.3777, "step": 8253, "task_loss": 0.8780134916305542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.28846055269241333, "epoch": 6.98, "learning_rate": 1.6302378939741578e-07, "loss": 0.5101, "step": 8254, "task_loss": 0.4271152913570404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4990662932395935, "epoch": 6.98, "learning_rate": 1.5698587127158559e-07, "loss": 0.4595, "step": 8255, "task_loss": 0.9109876751899719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33004283905029297, "epoch": 6.98, "learning_rate": 1.5094795314575536e-07, "loss": 0.3308, "step": 8256, "task_loss": 0.7639520764350891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.39072078466415405, "epoch": 6.98, "learning_rate": 1.4491003501992514e-07, "loss": 0.3345, "step": 8257, "task_loss": 0.36566340923309326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3156832456588745, "epoch": 6.98, "learning_rate": 1.3887211689409492e-07, "loss": 0.3101, "step": 8258, "task_loss": 0.8746612668037415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5381653308868408, "epoch": 6.98, "learning_rate": 1.3283419876826472e-07, "loss": 0.5419, "step": 8259, "task_loss": 2.124971389770508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5853549242019653, "epoch": 6.98, "learning_rate": 1.267962806424345e-07, "loss": 0.4934, "step": 8260, "task_loss": 0.6233627200126648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4489282965660095, "epoch": 6.98, "learning_rate": 1.2075836251660427e-07, "loss": 0.4431, "step": 8261, "task_loss": 0.8354801535606384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5420633554458618, "epoch": 6.98, "learning_rate": 1.1472044439077406e-07, "loss": 0.4513, "step": 8262, "task_loss": 0.8065614104270935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.36937177181243896, "epoch": 6.98, "learning_rate": 1.0868252626494385e-07, "loss": 0.3463, "step": 8263, "task_loss": 0.12614281475543976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.6994350552558899, "epoch": 6.99, "learning_rate": 1.0264460813911363e-07, "loss": 0.4711, "step": 8264, "task_loss": 0.7263739705085754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.20920465886592865, "epoch": 6.99, "learning_rate": 9.660669001328342e-08, "loss": 0.4223, "step": 8265, "task_loss": 1.0424264669418335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.17900460958480835, "epoch": 6.99, "learning_rate": 9.056877188745321e-08, "loss": 0.3473, "step": 8266, "task_loss": 0.41000494360923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5167355537414551, "epoch": 6.99, "learning_rate": 8.453085376162299e-08, "loss": 0.4375, "step": 8267, "task_loss": 1.25648033618927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.45862525701522827, "epoch": 6.99, "learning_rate": 7.849293563579279e-08, "loss": 0.4071, "step": 8268, "task_loss": 1.5106650590896606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2271057665348053, "epoch": 6.99, "learning_rate": 7.245501750996257e-08, "loss": 0.3145, "step": 8269, "task_loss": 0.04377155750989914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.7007173895835876, "epoch": 6.99, "learning_rate": 6.641709938413236e-08, "loss": 0.5107, "step": 8270, "task_loss": 0.24144020676612854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4143049418926239, "epoch": 6.99, "learning_rate": 6.037918125830214e-08, "loss": 0.4022, "step": 8271, "task_loss": 0.9066441059112549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.5543140172958374, "epoch": 6.99, "learning_rate": 5.434126313247193e-08, "loss": 0.464, "step": 8272, "task_loss": 0.48787549138069153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.287723183631897, "epoch": 6.99, "learning_rate": 4.830334500664171e-08, "loss": 0.3819, "step": 8273, "task_loss": 0.35248225927352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.284237265586853, "epoch": 6.99, "learning_rate": 4.2265426880811495e-08, "loss": 0.4122, "step": 8274, "task_loss": 0.2473331242799759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.33322736620903015, "epoch": 6.99, "learning_rate": 3.6227508754981285e-08, "loss": 0.3522, "step": 8275, "task_loss": 0.7891225814819336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.4246176779270172, "epoch": 7.0, "learning_rate": 3.018959062915107e-08, "loss": 0.4606, "step": 8276, "task_loss": 0.9739570617675781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.22504520416259766, "epoch": 7.0, "learning_rate": 2.4151672503320856e-08, "loss": 0.3732, "step": 8277, "task_loss": 1.023877501487732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.3508482277393341, "epoch": 7.0, "learning_rate": 1.8113754377490642e-08, "loss": 0.3548, "step": 8278, "task_loss": 0.9861854314804077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.2988832890987396, "epoch": 7.0, "learning_rate": 1.2075836251660428e-08, "loss": 0.3086, "step": 8279, "task_loss": 0.17360980808734894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.577366828918457, "epoch": 7.0, "learning_rate": 6.037918125830214e-09, "loss": 0.4192, "step": 8280, "task_loss": 0.20891474187374115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.386257320484995, "compression/movement_sparsity/model_sparsity": 0.3729881932598306, "compression_loss": 0.0, "distillation_loss": 0.1905464082956314, "epoch": 7.0, "learning_rate": 0.0, "loss": 0.3376, "step": 8281, "task_loss": 0.2361963987350464 }, { "epoch": 7.0, "step": 8281, "total_flos": 4.176434448946852e+19, "train_loss": 18.350530295344083, "train_runtime": 37482.3852, "train_samples_per_second": 14.147, "train_steps_per_second": 0.221 } ], "max_steps": 8281, "num_train_epochs": 7, "total_flos": 4.176434448946852e+19, "trial_name": null, "trial_params": null }